In [3]:
#1.
#Importing libraries
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd

#Attach Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

#2.
#SVC Classification - Acquire Data
df = pd.read_csv('/content/drive/MyDrive/banknote_authentication.csv')

#4.
#Train and Test
x_classification = df.drop(columns=["forgery"])
y_classification = df["forgery"]
x_train_classification, x_test_classification, y_train_classification, y_test_classification = train_test_split(x_classification, y_classification, test_size=0.2, random_state=42)

#5.
#Pipeline for Regression
classification_pipeline = Pipeline([
    ('scaling', MinMaxScaler()),
    ('classifier', DecisionTreeClassifier())
])

#6.
#Execute
classification_pipeline.fit(x_train_classification, y_train_classification)


#7.
#Evaluating the Model
y_pred_classification = classification_pipeline.predict(x_test_classification)

# Confusion matrix
def plot_cm(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    print(cm)

plot_cm(y_test_classification, y_pred_classification)

# Precision, recall, and F1 score
precision = precision_score(y_test_classification, y_pred_classification)
recall = recall_score(y_test_classification, y_pred_classification)
f1 = f1_score(y_test_classification, y_pred_classification)

# Printing precision, recall, and F1 score
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

#1
#Aquire Date
#Rename fields
df_regression = pd.read_csv('/content/drive/MyDrive/Steel_industry_data.csv')

df_regression = df_regression.rename(columns={
    "Usage_kWh": "usage_kwh",
    "Lagging_Current_Reactive.Power_kVarh": "lag_react_pwr_kvarh",
    "Leading_Current_Reactive_Power_kVarh": "lead_react_pwr_kvarh",
    "Lagging_Current_Power_Factor": "lag_current_pwr",
    "Leading_Current_Power_Factor": "lead_current_pwr",
    "NSM": "nsm",
    "WeekStatus": "week_status",
    "Day_of_week": "day_of_week",
    "Load_Type": "load_type"
})

#2
#Train and Test
x_regression = df_regression.drop(columns=["date", "usage_kwh"])
y_regression = df_regression["usage_kwh"]
x_train_regression, x_test_regression, y_train_regression, y_test_regression = train_test_split(x_regression, y_regression, test_size=0.2, random_state=42)

#3,4
#Building the Pipeline for Regression
regression_pipeline = Pipeline([
    ('preprocessing', ColumnTransformer([
        ('numeric', MinMaxScaler(), ['lag_react_pwr_kvarh', 'lead_react_pwr_kvarh', 'lag_current_pwr', 'lead_current_pwr', 'nsm']),
        ('categorical', OneHotEncoder(), ['week_status', 'day_of_week', 'load_type'])
    ])),
    ('regressor', DecisionTreeRegressor())
])

#5.
#Executing
regression_pipeline.fit(x_train_regression, y_train_regression)

#6.
#Evaluate
y_pred_regression = regression_pipeline.predict(x_test_regression)
rmse_train = mean_squared_error(y_train_regression, regression_pipeline.predict(x_train_regression), squared=False)
rmse_test = mean_squared_error(y_test_regression, y_pred_regression, squared=False)

print("RMSE for Train:", rmse_train)
print("RMSE for Test:", rmse_test)

#Classification Conclusion:
#The Decision Tree model achieved a precision of 0.9375, recall of 0.924, and F1 score of 0.944881.
#These metrics indicate that the model has high precision, recall, and F1 score, suggesting it performs well
#in correctly classifying both positive and negative instances. The model performed well and distinguished
#effectively between genuine and forged banknotes.

#Regression Conclusion:
#The Decision Tree Regression model achieved an RMSE of 69.602562 for the train data and 105.641167 for the
#test data. These values indicate the average deviation of the predicted values from the actual values.
#Overall, the model performs decently in predicting the kWh usage for the steel industry, but there is room
#for improvement, considering the relatively high RMSE on the test data.





Mounted at /content/drive
[[147   1]
 [  5 122]]
Precision: 0.991869918699187
Recall: 0.9606299212598425
F1 Score: 0.976
RMSE for Train: 1.7905344990687277e-16
RMSE for Test: 1.1841848823627323
