In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

In [6]:
train_data = pd.read_csv('titanic/train.csv')

In [7]:
train_data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [9]:
train_data['Age'] = train_data['Age'].fillna(train_data['Age'].median())
train_data['Fare'] = train_data['Fare'].fillna(train_data['Fare'].median())
train_data['Embarked'] = train_data['Embarked'].fillna(train_data['Embarked'].mode()[0])

In [11]:
train_data = pd.get_dummies(train_data, columns=['Sex', 'Embarked'], drop_first=True)


In [12]:
train_data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Sex_male,Embarked_Q,Embarked_S
0,1,0,3,"Braund, Mr. Owen Harris",22.0,1,0,A/5 21171,7.2500,,True,False,True
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,1,0,PC 17599,71.2833,C85,False,False,False
2,3,1,3,"Heikkinen, Miss. Laina",26.0,0,0,STON/O2. 3101282,7.9250,,False,False,True
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,1,0,113803,53.1000,C123,False,False,True
4,5,0,3,"Allen, Mr. William Henry",35.0,0,0,373450,8.0500,,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",27.0,0,0,211536,13.0000,,True,False,True
887,888,1,1,"Graham, Miss. Margaret Edith",19.0,0,0,112053,30.0000,B42,False,False,True
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",28.0,1,2,W./C. 6607,23.4500,,False,False,True
889,890,1,1,"Behr, Mr. Karl Howell",26.0,0,0,111369,30.0000,C148,True,False,False


In [13]:
X_reg = train_data[['Age', 'Pclass', 'SibSp', 'Parch', 'Sex_male', 'Embarked_Q', 'Embarked_S']]
y_reg = train_data['Fare']

In [14]:
X_class = train_data[['Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Sex_male', 'Embarked_Q', 'Embarked_S']]
y_class = train_data['Survived']

In [15]:
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

In [16]:
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# 1 ერთცვლადიანი რეგრესია

In [17]:
simple_model = LinearRegression()
simple_model.fit(X_train_reg[['Age']], y_train_reg)

# 2 მრავალცვლადიანი რეგრესია

In [18]:
multi_model = LinearRegression()
multi_model.fit(X_train_reg, y_train_reg)

# 3 გადაწყვეტილების ხის რეგრესია

In [19]:
tree_regressor = DecisionTreeRegressor(random_state=42)
tree_regressor.fit(X_train_reg, y_train_reg)

# 4 ლოგისტიკური რეგრესია

In [20]:
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train_class, y_train_class)

# გადაწყვეტილების ხის კლასიფიკაციის მოდელი

In [21]:
tree_classifier = DecisionTreeClassifier(random_state=42)
tree_classifier.fit(X_train_class, y_train_class)

# დატესტვა

In [22]:
test_data = pd.read_csv('titanic/test.csv')

In [23]:
test_data['Age'] = test_data['Age'].fillna(test_data['Age'].median())
test_data['Fare'] = test_data['Fare'].fillna(test_data['Fare'].median())
test_data['Embarked'] = test_data['Embarked'].fillna(test_data['Embarked'].mode()[0])

In [24]:
test_data = pd.get_dummies(test_data, columns=['Sex', 'Embarked'], drop_first=True)

In [37]:
required_columns = ['Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Sex_male', 'Embarked_Q', 'Embarked_S']


In [31]:
simple_predictions = simple_model.predict(test_data[['Age']])
print(f'Simple Linear Regression Predictions: {simple_predictions[:10]}')

Simple Linear Regression Predictions: [34.50365904 39.02931449 44.46010103 31.78826577 29.97800359 27.08158411
 32.87442308 31.42621334 28.52979385 29.61595116]


In [32]:
multi_predictions = multi_model.predict(test_data[required_columns])
print(f'Multiple Linear Regression Predictions: {multi_predictions[:10]}')

Multiple Linear Regression Predictions: [ 3.77681557  4.93961967 35.51127703 -2.87298644 17.79897952 -1.83383632
  7.74286686 47.80555919 22.64017604  9.21652756]


In [33]:
tree_reg_predictions = tree_regressor.predict(test_data[required_columns])
print(f'Decision Tree Regression Predictions: {tree_reg_predictions[:10]}')

Decision Tree Regression Predictions: [ 6.4375   14.5      10.5       7.8111   15.7417    8.51834   7.791325
 30.        5.62085  18.      ]


In [39]:
logistic_predictions = logistic_model.predict(test_data[required_columns])
print(f'Logistic Regression Predictions: {logistic_predictions[:10]}')

Logistic Regression Predictions: [0 0 0 0 1 0 1 0 1 0]


In [41]:
tree_class_predictions = tree_classifier.predict(test_data[required_columns])
print(f'Decision Tree Classification Predictions: {tree_class_predictions[:10]}')

Decision Tree Classification Predictions: [0 1 1 1 1 0 0 0 1 0]
