<a href="https://colab.research.google.com/github/DanielDekhtyar/AI-Accelerator/blob/main/Module%207%20-%20Machine%20Learning/titanic_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [2]:
# קריאת הנתונים
df = pd.read_csv("titanic.csv")

In [3]:
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 887 entries, 0 to 886
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Survived                 887 non-null    int64  
 1   Pclass                   887 non-null    int64  
 2   Name                     887 non-null    object 
 3   Sex                      887 non-null    object 
 4   Age                      887 non-null    float64
 5   Siblings/Spouses Aboard  887 non-null    int64  
 6   Parents/Children Aboard  887 non-null    int64  
 7   Fare                     887 non-null    float64
dtypes: float64(2), int64(4), object(2)
memory usage: 55.6+ KB


In [5]:
#EDA
# יש להפוך את עמודת מין לעמודה בינארית
df['Sex'] = df['Sex'] == 'male'
# יש למחוק  שורות שחסר בהן מידע בעמודות:['Pclass', 'Sex', 'Age', 'Fare', 'Survived']
df = df[['Pclass', 'Sex', 'Age', 'Fare', 'Survived']].dropna()
#אלה גם העמודות היחידות שהמודל יקח בחשבון
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 887 entries, 0 to 886
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Pclass    887 non-null    int64  
 1   Sex       887 non-null    bool   
 2   Age       887 non-null    float64
 3   Fare      887 non-null    float64
 4   Survived  887 non-null    int64  
dtypes: bool(1), float64(2), int64(2)
memory usage: 28.7 KB


In [6]:
# הפרדת פיצ'רים ותוויות
# על הפיצ'רים לכלול רק את: ['Pclass', 'Sex', 'Age', 'Fare']
X = df[['Pclass', 'Sex', 'Age', 'Fare']].values
y = df['Survived'].values

In [7]:
# חלוקה לסט אימון וסט בדיקה
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [8]:
# סטנדרטיזציה של הפיצ'רים
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# המרת הנתונים לטנסורים
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [10]:
# הגדרת המודל
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(4, 16)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(16, 8)
        self.tanh = nn.Tanh()
        self.fc3 = nn.Linear(8, 1)
        self.dropout = nn.Dropout(p = 0.3)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.tanh(x)
        x = self.fc3(x)
        return x

In [11]:
# יצירת אינסטנס של המודל
model = Classifier()

# הגדרת פונקציית הפסד ואופטימיזר
criterion = nn.BCEWithLogitsLoss()
optimaizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
# אימון המודל
    # הדפסת איבוד כל 100 אפוקים
    # הדפיסו גם את את הדיוק של סט האימון על הדרך

epochs = 1000

for epoch in range(epochs):
    model.train()
    optimaizer.zero_grad() # איתחול של כל המשתנים באוטימייזר

    # prediction
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # correcting error
    loss.backward()
    optimaizer.step()

    if epoch%100 == 0:
        predicitions = (torch.sigmoid(outputs) > 0.5).float()
        accuracy = (predicitions == y_train_tensor).float().mean()
        print(f"Epoch : {epoch}/{epochs}, Loss : {loss.item() :.2f}, Accuracy : {accuracy.item()*100 :.2f}%")

Epoch : 0/1000, Loss : 0.68, Accuracy : 61.78%
Epoch : 100/1000, Loss : 0.57, Accuracy : 73.77%
Epoch : 200/1000, Loss : 0.49, Accuracy : 77.57%
Epoch : 300/1000, Loss : 0.46, Accuracy : 78.98%
Epoch : 400/1000, Loss : 0.45, Accuracy : 78.70%
Epoch : 500/1000, Loss : 0.43, Accuracy : 81.10%
Epoch : 600/1000, Loss : 0.44, Accuracy : 79.69%
Epoch : 700/1000, Loss : 0.42, Accuracy : 82.09%
Epoch : 800/1000, Loss : 0.43, Accuracy : 80.11%
Epoch : 900/1000, Loss : 0.42, Accuracy : 79.83%


In [13]:
# הערכת המודל
model.eval()
with torch.no_grad():
    predicitions = (torch.sigmoid(model(X_test_tensor)) > 0.5).float()
    accuracy = (predicitions == y_test_tensor).float().mean().item()
    print(f"Final accuracy : {accuracy*100 :.2f}")

Final accuracy : 82.58
