<a href="https://colab.research.google.com/github/ShinjiNeo/Test_ML/blob/main/Template_Classif_Dic_PIPE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PIPE della Classificazione Dicotomica/Binaria

- importo le Librerie

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.pipeline import make_pipeline,Pipeline
from sklearn.compose import make_column_transformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import KBinsDiscretizer,OneHotEncoder,StandardScaler,FunctionTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

import warnings
warnings.filterwarnings('ignore')

- Importo il dataset

In [2]:
path ='https://frenzy86.s3.eu-west-2.amazonaws.com/fav/tecno/titanic.csv'
df = pd.read_csv(path, sep='\t')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
151,152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22.0,1,0,113776,66.6000,C2,S
152,153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.0500,,S
153,154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5000,,S
154,155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S


- Selezione le Features da Rimuovere

In [3]:
features_to_remove = ['Name', 'PassengerId', 'Ticket', 'Cabin']
df_clean = df.drop(features_to_remove, axis=1)

- Divido tra Target e Features

In [6]:
TARGET='Survived'
X = df_clean.drop(['Survived'], axis=1) # matrice Features
y = df_clean[TARGET]

In [7]:
print(X.shape)
print(y.shape)

(156, 7)
(156,)


In [8]:
numerical_features = [x for x, dtype in zip(X.columns, X.dtypes) if dtype.kind in ['i','f'] ]
categorical_features = [x for x, dtype in zip(X.columns, X.dtypes) if dtype.kind not in ['i','f']]

print('Numerical : ' + str(numerical_features))
print('Categorical : ' + str(categorical_features))

Numerical : ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
Categorical : ['Sex', 'Embarked']


## Preprocessing

- Formalizzo le cose da fare nel Preprocessing

In [9]:
Preprocessor = make_column_transformer(
                                        (make_pipeline(
                                        SimpleImputer(strategy = 'median'),
                                        KBinsDiscretizer(n_bins=3)), numerical_features),

                                        (make_pipeline(
                                        #FunctionTransformer(lambda x: np.where(x == 'n.d.', np.nan, x)),
                                        SimpleImputer(strategy = 'constant', fill_value = 'missing'),
                                        OneHotEncoder(categories = 'auto', handle_unknown = 'ignore',drop='first')), categorical_features)
                                        )

- Standardizzazione

In [10]:
scaler = StandardScaler()

- Classificazione

In [17]:
classifier = RandomForestClassifier(bootstrap=True,
                                    max_depth=80,
                                    max_features=2,
                                    min_samples_leaf=3,
                                    min_samples_split=8,
                                    n_estimators=200
                                    )

## PIPE LINE vera e propria

In [18]:
model_pipe = Pipeline([
                       ('Preprocessing features', Preprocessor),
                       ('Scaling and standadize data', scaler),
                       ('Classifier', classifier)
                      ])
model_pipe

- Cross Validation

In [19]:
cross_val_scores = cross_val_score(model_pipe, X, y, cv=5)
print(cross_val_scores)

# Calcola la media e la deviazione standard dei punteggi
mean_score = np.mean(cross_val_scores)
std_score = np.std(cross_val_scores)

print(f"Mean cross-validation score:{mean_score:.2f}")
print(f"Standard deviation of cross-validation scores:{std_score:.2f}")



[0.8125     0.77419355 0.77419355 0.70967742 0.70967742]
Mean cross-validation score:0.76
Standard deviation of cross-validation scores:0.04


- Applico la Pipe al mio dataset

In [20]:
model_pipe.fit(X, y)

In [21]:
y_pred_tot = model_pipe.predict(X)

In [22]:
from sklearn.metrics import classification_report

classification_report(y,y_pred_tot )

classification_report_result = classification_report(y, y_pred_tot)
print(classification_report_result)

              precision    recall  f1-score   support

           0       0.82      0.98      0.89       102
           1       0.94      0.59      0.73        54

    accuracy                           0.85       156
   macro avg       0.88      0.79      0.81       156
weighted avg       0.86      0.85      0.84       156



## Predizione

- su singolo caso

In [24]:
Age= 34
Embarked = 'Q'
Fare =7.8
Parch = 0
Pclass = 3
Sex = 'male'
SibSp = 0

data = {
        "Pclass": [Pclass],
        "Sex": [Sex],
        "Age": [Age],
        "SibSp": [SibSp],
        "Parch": [Parch],
        "Fare": [Fare],
        "Embarked": [Embarked]
        }

input_df = pd.DataFrame(data)
res = model_pipe.predict(input_df).astype(int)[0]
print(res)

classes = {0:'died',
           1:'survived',
           }

y_pred = classes[res]
y_pred



0


'died'

## Esportazione del modello

In [25]:
import joblib

joblib.dump(model_pipe,'titanic_pipe.pkl')

['titanic_pipe.pkl']

In [26]:
import joblib

model_pipe = joblib.load('titanic_pipe.pkl')
print('Model loaded successfully')

Model loaded successfully


- testo il modello su dei dati

In [27]:
Age= 34
Embarked = 'Q'
Fare =7.8
Parch = 0
Pclass = 3
Sex = 'female'
SibSp = 0

data = {
        "Pclass": [Pclass],
        "Sex": [Sex],
        "Age": [Age],
        "SibSp": [SibSp],
        "Parch": [Parch],
        "Fare": [Fare],
        "Embarked": [Embarked]
        }
classes = {0:'died',
           1:'survived',
           }

res = model_pipe.predict(input_df).astype(int)[0]
print(res)

y_pred = classes[res]
y_pred

0


'died'