In [1]:
# Mount gdrive folders on this colab session
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
# 1. Clona il repo localmente
!pip install influenciae

# 2. Modifica i requisiti nel setup.py (disabilita la restrizione su tensorflow)
!#sed -i 's/"tensorflow>=2.7.0,<2.10.0"/"tensorflow>=2.12.0"/' setup.py

# 3. Installa la versione modificata
#!pip install .


Collecting influenciae
  Downloading Influenciae-0.3.0-py3-none-any.whl.metadata (17 kB)
INFO: pip is looking at multiple versions of influenciae to determine which version is compatible with other requirements. This could take a while.
  Downloading Influenciae-0.2.0-py3-none-any.whl.metadata (16 kB)
  Downloading Influenciae-0.1.0-py3-none-any.whl.metadata (12 kB)
Downloading Influenciae-0.1.0-py3-none-any.whl (72 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: influenciae
Successfully installed influenciae-0.1.0


In [4]:
import tensorflow as tf
tf.config.run_functions_eagerly(True)
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras import Sequential
from keras import layers

from deel.influenciae.common import InfluenceModel, ExactIHVP
from deel.influenciae.influence import FirstOrderInfluenceCalculator
from deel.influenciae.utils import ORDER
from deel.influenciae.trac_in import TracIn
from keras.losses import BinaryCrossentropy
import warnings
# Patch temporanea per compatibilità con numpy >=1.24
if not hasattr(np, 'float'):
    np.float = float
warnings.filterwarnings('ignore')


In [5]:
train_file_path = '/content/drive/MyDrive/influence-analysis/dataset/titanic.csv'
original_df = pd.read_csv(train_file_path)
original_df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [6]:
df = original_df.copy()
df.rename(columns={'PassengerId': 'ID'}, inplace=True)
df = df.drop(columns = ["Name", "Ticket", "Cabin"])
df

Unnamed: 0,ID,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.2500,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.9250,S
3,4,1,1,female,35.0,1,0,53.1000,S
4,5,0,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,27.0,0,0,13.0000,S
887,888,1,1,female,19.0,0,0,30.0000,S
888,889,0,3,female,,1,2,23.4500,S
889,890,1,1,male,26.0,0,0,30.0000,C


In [7]:
print(df.isna().sum())

ID            0
Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Embarked      2
dtype: int64


In [8]:
df = df.dropna(subset=['Embarked'])
df

Unnamed: 0,ID,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.2500,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.9250,S
3,4,1,1,female,35.0,1,0,53.1000,S
4,5,0,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,27.0,0,0,13.0000,S
887,888,1,1,female,19.0,0,0,30.0000,S
888,889,0,3,female,,1,2,23.4500,S
889,890,1,1,male,26.0,0,0,30.0000,C


In [9]:
df = df.copy()
df['MissAge'] = df['Age'].isna().astype(int)
df.fillna({'Age':0}, inplace=True)
df

Unnamed: 0,ID,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,MissAge
0,1,0,3,male,22.0,1,0,7.2500,S,0
1,2,1,1,female,38.0,1,0,71.2833,C,0
2,3,1,3,female,26.0,0,0,7.9250,S,0
3,4,1,1,female,35.0,1,0,53.1000,S,0
4,5,0,3,male,35.0,0,0,8.0500,S,0
...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,27.0,0,0,13.0000,S,0
887,888,1,1,female,19.0,0,0,30.0000,S,0
888,889,0,3,female,0.0,1,2,23.4500,S,1
889,890,1,1,male,26.0,0,0,30.0000,C,0


In [10]:
print(df.isna().sum())

ID          0
Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
MissAge     0
dtype: int64


In [11]:
sex_trans = LabelEncoder()
df['Sex'] = sex_trans.fit_transform(df['Sex'])

Emb_trans = LabelEncoder()
df['Embarked'] = Emb_trans.fit_transform(df['Embarked'])

df

Unnamed: 0,ID,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,MissAge
0,1,0,3,1,22.0,1,0,7.2500,2,0
1,2,1,1,0,38.0,1,0,71.2833,0,0
2,3,1,3,0,26.0,0,0,7.9250,2,0
3,4,1,1,0,35.0,1,0,53.1000,2,0
4,5,0,3,1,35.0,0,0,8.0500,2,0
...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,1,27.0,0,0,13.0000,2,0
887,888,1,1,0,19.0,0,0,30.0000,2,0
888,889,0,3,0,0.0,1,2,23.4500,2,1
889,890,1,1,1,26.0,0,0,30.0000,0,0


In [12]:
Normalize = StandardScaler()
Normalize_cols = ["Age", "Fare"]
df[Normalize_cols] = Normalize.fit_transform(df[Normalize_cols])
df

Unnamed: 0,ID,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,MissAge
0,1,0,3,1,-0.099150,1,0,-0.500240,2,0
1,2,1,1,0,0.812389,1,0,0.788947,0,0
2,3,1,3,0,0.128735,0,0,-0.486650,2,0
3,4,1,1,0,0.641476,1,0,0.422861,2,0
4,5,0,3,1,0.641476,0,0,-0.484133,2,0
...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,1,0.185706,0,0,-0.384475,2,0
887,888,1,1,0,-0.270063,0,0,-0.042213,2,0
888,889,0,3,0,-1.352516,1,2,-0.174084,2,1
889,890,1,1,1,0.128735,0,0,-0.042213,0,0


In [13]:
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [14]:
X = df.drop(columns=['Survived'])
y = df['Survived']
IDs = X['ID'].values.reshape(-1,1).astype(np.float32)
IDs = IDs / 1e7

X_withNotID= X.drop(columns=['ID']).values.astype(np.float32)
X_withNotID= np.hstack((X_withNotID,IDs))
y_oneHot = to_categorical(y.values, num_classes=2)

X_withNotID.shape

(889, 9)

In [15]:
y_oneHot.shape

(889, 2)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X_withNotID, y_oneHot, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)


(711, 9)
(178, 9)
(711, 2)
(178, 2)


In [17]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [18]:
print(len(train_ds))
print(len(test_ds))

711
178


In [19]:
from keras import Sequential
from keras import layers

In [20]:
model = Sequential([
        layers.Dense(32, activation='relu', input_shape=(9,)),
        layers.Dense(16, activation='relu'),
        layers.Dense(2, activation='sigmoid')
    ])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

epochs = 10
unreduced_loss_fn = BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
model_list = []
model_list.append(InfluenceModel(model, start_layer=-1, loss_function=unreduced_loss_fn))
for i in range(epochs):
  model.fit(train_ds.batch(32), epochs=1, validation_data=test_ds.batch(32), verbose=2)
  updated_model = tf.keras.models.clone_model(model)
  updated_model.set_weights(model.get_weights())
  model_list.append(InfluenceModel(model, start_layer=-1, loss_function=unreduced_loss_fn))
model.evaluate(test_ds.batch(32), verbose=2)

23/23 - 2s - 79ms/step - accuracy: 0.4768 - loss: 0.7246 - val_accuracy: 0.5955 - val_loss: 0.6652
23/23 - 2s - 68ms/step - accuracy: 0.6160 - loss: 0.6507 - val_accuracy: 0.6124 - val_loss: 0.6403
23/23 - 1s - 55ms/step - accuracy: 0.6188 - loss: 0.6325 - val_accuracy: 0.6124 - val_loss: 0.6226
23/23 - 2s - 91ms/step - accuracy: 0.6371 - loss: 0.6162 - val_accuracy: 0.6517 - val_loss: 0.6061
23/23 - 2s - 106ms/step - accuracy: 0.6624 - loss: 0.6008 - val_accuracy: 0.6966 - val_loss: 0.5898
23/23 - 3s - 134ms/step - accuracy: 0.6835 - loss: 0.5856 - val_accuracy: 0.7191 - val_loss: 0.5727
23/23 - 5s - 196ms/step - accuracy: 0.7018 - loss: 0.5690 - val_accuracy: 0.7247 - val_loss: 0.5566
23/23 - 3s - 151ms/step - accuracy: 0.7117 - loss: 0.5529 - val_accuracy: 0.7472 - val_loss: 0.5413
23/23 - 1s - 65ms/step - accuracy: 0.7201 - loss: 0.5381 - val_accuracy: 0.7753 - val_loss: 0.5278
23/23 - 1s - 55ms/step - accuracy: 0.7342 - loss: 0.5243 - val_accuracy: 0.7978 - val_loss: 0.5159
6/6 - 

[0.5159361362457275, 0.7977527976036072]