Artifacial Intelligence LAB Project F2021266028 (Rida Abrar)

**1-Logistic Regression:**

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, confusion_matrix, classification_report
import numpy as np

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Thyroid_Diff.csv')

# Preprocessing: Convert categorical variables to dummy variables
df = pd.get_dummies(df, drop_first=True)

# Define features and target variable
X = df.drop('Recurred_Yes', axis=1)  # Adjust based on your column encoding
y = df['Recurred_Yes']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]  # Get the probabilities for the positive class

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_prob))  # Use probabilities for RMSE
mae = mean_absolute_error(y_test, y_pred_prob)  # Use probabilities for MAE

# Print evaluation metrics
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')

# Detailed classification report
print('\nClassification Report:\n', classification_report(y_test, y_pred))

# Confusion matrix
print('\nConfusion Matrix:\n', confusion_matrix(y_test, y_pred))



Accuracy: 0.987012987012987
Precision: 1.0
Recall: 0.9473684210526315
F1 Score: 0.972972972972973
RMSE: 0.15685764998187254
MAE: 0.06428868667184154

Classification Report:
               precision    recall  f1-score   support

       False       0.98      1.00      0.99        58
        True       1.00      0.95      0.97        19

    accuracy                           0.99        77
   macro avg       0.99      0.97      0.98        77
weighted avg       0.99      0.99      0.99        77


Confusion Matrix:
 [[58  0]
 [ 1 18]]


**2-Multinomial Logistic Regression:**

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Thyroid_Diff.csv'
df = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Thyroid_Diff.csv')
# Select features and target variable
X = df.drop(columns='Response')
y = df['Response']

# Preprocess the data
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

# Create a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Create a pipeline with preprocessing and logistic regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=500))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Detailed classification report
print('\nClassification Report:\n', classification_report(y_test, y_pred))

# Confusion matrix
print('\nConfusion Matrix:\n', confusion_matrix(y_test, y_pred))

Accuracy: 0.7662337662337663
Classification Report:
                         precision    recall  f1-score   support

Biochemical Incomplete       0.00      0.00      0.00         3
             Excellent       0.83      0.92      0.87        48
         Indeterminate       0.20      0.10      0.13        10
 Structural Incomplete       0.82      0.88      0.85        16

              accuracy                           0.77        77
             macro avg       0.46      0.47      0.46        77
          weighted avg       0.71      0.77      0.74        77


Classification Report:
                         precision    recall  f1-score   support

Biochemical Incomplete       0.00      0.00      0.00         3
             Excellent       0.83      0.92      0.87        48
         Indeterminate       0.20      0.10      0.13        10
 Structural Incomplete       0.82      0.88      0.85        16

              accuracy                           0.77        77
             macro av

**3-Random Forest**

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error, mean_absolute_error
import numpy as np

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Thyroid_Diff.csv'
df = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Thyroid_Diff.csv')

# Select features and target variable
X = df.drop(columns='Response')
y = df['Response']

# Encode the target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Preprocess the data
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

# Create a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Create a pipeline with preprocessing and random forest classifier
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred, output_dict=True)
precision = classification_report_result['weighted avg']['precision']
recall = classification_report_result['weighted avg']['recall']
f1_score = classification_report_result['weighted avg']['f1-score']
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1_score)
print("RMSE:", rmse)
print("MAE:", mae)
print("\nFull Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.7662337662337663
Precision: 0.7398738032465209
Recall: 0.7662337662337663
F1-Score: 0.7494271560545492
RMSE: 0.6644986392449886
MAE: 0.2857142857142857

Full Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.84      0.85      0.85        48
           2       0.33      0.20      0.25        10
           3       0.84      1.00      0.91        16

    accuracy                           0.77        77
   macro avg       0.50      0.51      0.50        77
weighted avg       0.74      0.77      0.75        77



**4-Gradient Boosting Machines(GBM)**

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error, mean_absolute_error
import numpy as np

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Thyroid_Diff.csv'
df = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Thyroid_Diff.csv')

# Helper function to preprocess, train, and evaluate the model
def train_and_evaluate(df, target):
    # Select features and target variable
    X = df.drop(columns=[target])
    y = df[target]

    # Encode the target variable
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Preprocess the data
    numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
    categorical_features = X.select_dtypes(include=['object']).columns

    # Create a column transformer for preprocessing
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_features),
            ('cat', OneHotEncoder(), categorical_features)
        ])

    # Create a pipeline with preprocessing and gradient boosting classifier
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', GradientBoostingClassifier(random_state=42))
    ])

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    classification_report_result = classification_report(y_test, y_pred, output_dict=True)
    precision = classification_report_result['weighted avg']['precision']
    recall = classification_report_result['weighted avg']['recall']
    f1_score = classification_report_result['weighted avg']['f1-score']
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)

    # Print evaluation results
    print(f"Results for {target}:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-Score:", f1_score)
    print("RMSE:", rmse)
    print("MAE:", mae)
    print("\nFull Classification Report:\n", classification_report(y_test, y_pred))
    print("\n" + "-"*80 + "\n")

# Train and evaluate for 'Response'
train_and_evaluate(df, 'Response')

# Train and evaluate for 'Recurred'
train_and_evaluate(df, 'Recurred')



Results for Response:
Accuracy: 0.8181818181818182
Precision: 0.7756132756132756
Recall: 0.8181818181818182
F1-Score: 0.7836407290188803
RMSE: 0.5345224838248488
MAE: 0.2077922077922078

Full Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.83      0.94      0.88        48
           2       0.25      0.10      0.14        10
           3       0.89      1.00      0.94        16

    accuracy                           0.82        77
   macro avg       0.74      0.59      0.62        77
weighted avg       0.78      0.82      0.78        77


--------------------------------------------------------------------------------

Results for Recurred:
Accuracy: 0.974025974025974
Precision: 0.974025974025974
Recall: 0.974025974025974
F1-Score: 0.974025974025974
RMSE: 0.16116459280507606
MAE: 0.025974025974025976

Full Classification Report:
               precision    recall  f1-score   suppo

**5-Support Vector Machines (SVM)**