In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from imblearn.over_sampling import SMOTE

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
PATH = "/content/drive/MyDrive/Progetto Manutenzione"

In [3]:
# Caricare e concatenare i file CSV (Case001.csv-Case177.csv)
data_frames = []
for i in range(1, 178):
    file_path = (PATH + f'/dataset/train/data/Case{i:03d}.csv')
    df = pd.read_csv(file_path)
    df['Case'] = i
    data_frames.append(df)

data = pd.concat(data_frames, ignore_index=True)
data

Unnamed: 0,TIME,P1,P2,P3,P4,P5,P6,P7,Case
0,0.000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,1
1,0.001,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,1
2,0.002,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,1
3,0.003,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,1
4,0.004,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,1
...,...,...,...,...,...,...,...,...,...
212572,1.196,1.948734,1.912482,1.904362,1.835654,1.924899,1.857220,1.920020,177
212573,1.197,1.948823,1.899824,1.884360,1.825497,1.927513,1.846068,1.919081,177
212574,1.198,1.957784,1.911383,1.893740,1.859805,1.940953,1.861668,1.950562,177
212575,1.199,1.970451,1.950009,1.945417,1.913911,1.953648,1.835381,1.983321,177


In [4]:
column_names = ['Case', 'Spacecraft', 'Condition', 'SV1', 'SV2', 'SV3', 'SV4', 'BP1', 'BP2', 'BP3', 'BP4', 'BP5', 'BP6', 'BP7', 'BV1']

labels = pd.read_csv(PATH + '/dataset/train/labels.csv', names=column_names)

# Sostituisco "Normal" con 0, "Anomaly" e "Fault" con 1
labels['Condition'] = labels['Condition'].map({'Normal': 0, 'Anomaly': 1, 'Fault': 1})

# Sostituisco "No"/"Yes" con 0/1 nelle colonne da "BP1" a "BV1"
for col in ['BP1', 'BP2', 'BP3', 'BP4', 'BP5', 'BP6', 'BP7', 'BV1']:
    labels[col] = labels[col].map({'No': 0, 'Yes': 1})

labels

Unnamed: 0,Case,Spacecraft,Condition,SV1,SV2,SV3,SV4,BP1,BP2,BP3,BP4,BP5,BP6,BP7,BV1
0,1,1,0,100,100,100,100,0,0,0,0,0,0,0,0
1,2,1,0,100,100,100,100,0,0,0,0,0,0,0,0
2,3,1,0,100,100,100,100,0,0,0,0,0,0,0,0
3,4,1,0,100,100,100,100,0,0,0,0,0,0,0,0
4,5,1,0,100,100,100,100,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,173,3,1,100,100,100,100,0,0,0,1,0,0,0,0
173,174,3,1,100,100,100,100,0,0,0,0,1,0,0,0
174,175,3,1,100,100,100,100,0,0,0,0,0,1,0,0
175,176,3,1,100,100,100,100,0,0,0,0,0,0,1,0


In [5]:
import pandas as pd

merged_data = pd.merge(data, labels, on="Case", how="left")

result_data = merged_data[["Case", "Condition", "P1", "P2", "P3", "P4", "P5", "P6", "P7"]]

In [6]:
result_data

Unnamed: 0,Case,Condition,P1,P2,P3,P4,P5,P6,P7
0,1,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
1,1,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
2,1,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
3,1,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
4,1,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
...,...,...,...,...,...,...,...,...,...
212572,177,1,1.948734,1.912482,1.904362,1.835654,1.924899,1.857220,1.920020
212573,177,1,1.948823,1.899824,1.884360,1.825497,1.927513,1.846068,1.919081
212574,177,1,1.957784,1.911383,1.893740,1.859805,1.940953,1.861668,1.950562
212575,177,1,1.970451,1.950009,1.945417,1.913911,1.953648,1.835381,1.983321


In [7]:
result_data.drop(["Case"], axis=1)

Unnamed: 0,Condition,P1,P2,P3,P4,P5,P6,P7
0,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
1,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
2,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
3,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
4,0,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000,2.000000
...,...,...,...,...,...,...,...,...
212572,1,1.948734,1.912482,1.904362,1.835654,1.924899,1.857220,1.920020
212573,1,1.948823,1.899824,1.884360,1.825497,1.927513,1.846068,1.919081
212574,1,1.957784,1.911383,1.893740,1.859805,1.940953,1.861668,1.950562
212575,1,1.970451,1.950009,1.945417,1.913911,1.953648,1.835381,1.983321


In [8]:
features = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7']
X = result_data[features]

y = result_data["Condition"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier(),
}

result = []

for model_name, model in models.items():
  print(f"Training {model_name}...")
  model.fit(X_train, y_train)

  print(f"Prediction on {model_name}...")
  y_pred = model.predict(X_test)

  accuracy = round(accuracy_score(y_test, y_pred), 2)
  precision = round(precision_score(y_test, y_pred), 2)
  recall = round(recall_score(y_test, y_pred), 2)
  f1 = round(f1_score(y_test, y_pred), 2)
  cm = confusion_matrix(y_test, y_pred)

  print(f"Adding result about {model_name}...")
  model_result = {}

  model_result["Model_name"] = model_name
  model_result["Precision"] = precision
  model_result["Recall"] = recall
  model_result["F1_score"] = f1
  model_result["Confusion_matrix"] = cm

  result.append(model_result)

  print(f"Result about {model_name} added. Next model.")

print("DONE.")

Training Logistic Regression...
Prediction on Logistic Regression...
Adding result about Logistic Regression...
Result about Logistic Regression added. Next model.
Training Decision Tree...


  _warn_prf(average, modifier, msg_start, len(result))


Prediction on Decision Tree...
Adding result about Decision Tree...
Result about Decision Tree added. Next model.
Training Random Forest...
Prediction on Random Forest...
Adding result about Random Forest...
Result about Random Forest added. Next model.
Training KNN...
Prediction on KNN...
Adding result about KNN...
Result about KNN added. Next model.
DONE.


In [18]:
result

[{'Model_name': 'Logistic Regression',
  'Precision': 0.0,
  'Recall': 0.0,
  'F1_score': 0.0,
  'Confusion_matrix': array([[25127,     0],
         [17389,     0]])},
 {'Model_name': 'Decision Tree',
  'Precision': 0.85,
  'Recall': 0.83,
  'F1_score': 0.84,
  'Confusion_matrix': array([[22507,  2620],
         [ 2872, 14517]])},
 {'Model_name': 'Random Forest',
  'Precision': 0.97,
  'Recall': 0.87,
  'F1_score': 0.91,
  'Confusion_matrix': array([[24668,   459],
         [ 2341, 15048]])},
 {'Model_name': 'KNN',
  'Precision': 0.77,
  'Recall': 0.64,
  'F1_score': 0.7,
  'Confusion_matrix': array([[21850,  3277],
         [ 6344, 11045]])}]

In [21]:
best_model = models["Random Forest"]

columns = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7']
predictions_result = []

for i in range(178, 223):
    file_path = (PATH + f'/dataset/test/data/Case{i:03d}.csv')
    df = pd.read_csv(file_path)
    df['Case'] = i

    prediction = best_model.predict(df[columns])
    predictions_result.append({"Case" : i, "Prediction" : prediction})

print(predictions_result)

[{'Case': 178, 'Prediction': array([0, 0, 0, ..., 1, 1, 1])}, {'Case': 179, 'Prediction': array([0, 0, 0, ..., 1, 1, 1])}, {'Case': 180, 'Prediction': array([0, 0, 0, ..., 0, 0, 0])}, {'Case': 181, 'Prediction': array([0, 0, 0, ..., 0, 0, 1])}, {'Case': 182, 'Prediction': array([0, 0, 0, ..., 0, 0, 0])}, {'Case': 183, 'Prediction': array([0, 0, 0, ..., 0, 0, 0])}, {'Case': 184, 'Prediction': array([1, 1, 1, ..., 1, 1, 1])}, {'Case': 185, 'Prediction': array([0, 0, 0, ..., 0, 0, 0])}, {'Case': 186, 'Prediction': array([0, 0, 0, ..., 1, 1, 1])}, {'Case': 187, 'Prediction': array([0, 0, 0, ..., 0, 0, 0])}, {'Case': 188, 'Prediction': array([0, 0, 0, ..., 1, 1, 1])}, {'Case': 189, 'Prediction': array([0, 0, 0, ..., 0, 0, 0])}, {'Case': 190, 'Prediction': array([0, 0, 0, ..., 1, 1, 0])}, {'Case': 191, 'Prediction': array([0, 0, 0, ..., 0, 0, 0])}, {'Case': 192, 'Prediction': array([1, 1, 1, ..., 0, 1, 1])}, {'Case': 193, 'Prediction': array([0, 0, 0, ..., 1, 1, 1])}, {'Case': 194, 'Predicti