<a href="https://colab.research.google.com/github/Ronith2205/STML/blob/main/Stml_PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression

# Load the dataset
data = pd.read_csv('/content/Toddler Autism dataset July 2018.csv')

# Display the first few rows to understand structure
print(data.head())

# Preprocess the data
# Dropping irrelevant columns (adjust based on the dataset)
# Example: Dropping 'ID' or other non-informative columns if they exist
# data = data.drop(columns=['ID'])

# Handling missing values if any
data = data.dropna()

# Encoding categorical variables
# Applying Label Encoding to object (categorical) columns
for column in data.select_dtypes(include=['object']).columns:
    data[column] = LabelEncoder().fit_transform(data[column])

# Define features and target variable
# 'Class/ASD Traits ' is assumed to be the target column based on dataset details
X = data.drop('Class/ASD Traits ', axis=1)
y = data['Class/ASD Traits ']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardizing the features (necessary for SVM and k-NN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 1. Support Vector Machine
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, svm_predictions))
print("SVM Classification Report:\n", classification_report(y_test, svm_predictions))

# 2. k-Nearest Neighbors
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
knn_predictions = knn_model.predict(X_test)
print("k-NN Accuracy:", accuracy_score(y_test, knn_predictions))
print("k-NN Classification Report:\n", classification_report(y_test, knn_predictions))

# 3. Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_predictions))
print("Decision Tree Classification Report:\n", classification_report(y_test, dt_predictions))

# 4. Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_predictions))
print("Random Forest Classification Report:\n", classification_report(y_test, rf_predictions))

# 5. Linear Regression (convert predictions to binary for classification)
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
# Converting predictions to binary labels (0 or 1) based on a threshold of 0.5
lr_predictions = [1 if pred >= 0.5 else 0 for pred in lr_predictions]
print("Linear Regression Accuracy:", accuracy_score(y_test, lr_predictions))
print("Linear Regression Classification Report:\n", classification_report(y_test, lr_predictions))

# 6. Logistic Regression
logistic_model = LogisticRegression(max_iter=200)
logistic_model.fit(X_train, y_train)
logistic_predictions = logistic_model.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, logistic_predictions))
print("Logistic Regression Classification Report:\n", classification_report(y_test, logistic_predictions))




   Case_No  A1  A2  A3  A4  A5  A6  A7  A8  A9  A10  Age_Mons  Qchat-10-Score  \
0        1   0   0   0   0   0   0   1   1   0    1        28               3   
1        2   1   1   0   0   0   1   1   0   0    0        36               4   
2        3   1   0   0   0   0   0   1   1   0    1        36               4   
3        4   1   1   1   1   1   1   1   1   1    1        24              10   
4        5   1   1   0   1   1   1   1   1   1    1        20               9   

  Sex       Ethnicity Jaundice Family_mem_with_ASD Who completed the test  \
0   f  middle eastern      yes                  no          family member   
1   m  White European      yes                  no          family member   
2   m  middle eastern      yes                  no          family member   
3   m        Hispanic       no                  no          family member   
4   f  White European       no                 yes          family member   

  Class/ASD Traits   
0                No  
1     

In [4]:
!pip install scikit-learn-extra

Collecting scikit-learn-extra
  Downloading scikit_learn_extra-0.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Downloading scikit_learn_extra-0.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn-extra
Successfully installed scikit-learn-extra-0.3.0


In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns
import graphviz

import matplotlib.pyplot as plt

import pydotplus
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
# The following line was changed to import from the correct module after installing scikit-learn-extra
from sklearn_extra.cluster import KMedoids # Fixed: Importing KMedoids from the correct module



In [15]:
# Load the dataset
df = pd.read_csv('/content/Toddler Autism dataset July 2018.csv')

# Display basic information
print("Dataset Information:")
print(df.info())
print("\nFirst Five Rows of the Dataset:")
print(df.head())

# Handle missing values
df = df.dropna()

# Encode categorical variables
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

# Split the data into features (X) and target (y)
X = df.drop(['Class/ASD Traits '], axis=1)  # Assuming the target column YES/NO
y = df['Class/ASD Traits ']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
def evaluate_model(y_true, y_pred, model_name):
    """
    Evaluate a model's performance and print metrics.
    """
    cm = confusion_matrix(y_true, y_pred)
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average="binary", pos_label=1)
    rec = recall_score(y_true, y_pred, average="binary", pos_label=1)
    f1 = f1_score(y_true, y_pred, average="binary", pos_label=1)

    print(f"\nModel: {model_name}")
    print(f"Confusion Matrix:\n{cm}")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1 Score: {f1:.4f}\n")

    return [model_name, acc, prec, rec, f1]
# Clustering with K-Medoids
kmedoids = KMedoids(n_clusters=2, random_state=42)
kmedoids.fit(X_train)

# Predict clusters
y_pred_kmedoids = kmedoids.predict(X_test)

# Map clusters to actual labels
# Assuming cluster 0 -> Class 0, cluster 1 -> Class 1 (adjust as needed)
y_pred_kmedoids = np.where(y_pred_kmedoids == 0, 1, 0)

# Evaluate K-Medoids
kmedoids_results = evaluate_model(y_test, y_pred_kmedoids, "K-Medoids")

# KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Evaluate KNN
knn_results = evaluate_model(y_test, y_pred_knn, "KNN")
# Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Evaluate Decision Tree
dt_results = evaluate_model(y_test, y_pred_dt, "Decision Tree")
# Random Forest Classifier
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Evaluate Random Forest
rf_results = evaluate_model(y_test, y_pred_rf, "Random Forest")
# SVM Classifier
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# Evaluate SVM
svm_results = evaluate_model(y_test, y_pred_svm, "SVM")
# Combine results from all models
results = [kmedoids_results, knn_results, dt_results, rf_results, svm_results]

# Create a DataFrame for comparison
comparison_table = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1 Score"])

print("\nComparison Table:")
print(comparison_table)



Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1054 entries, 0 to 1053
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Case_No                 1054 non-null   int64 
 1   A1                      1054 non-null   int64 
 2   A2                      1054 non-null   int64 
 3   A3                      1054 non-null   int64 
 4   A4                      1054 non-null   int64 
 5   A5                      1054 non-null   int64 
 6   A6                      1054 non-null   int64 
 7   A7                      1054 non-null   int64 
 8   A8                      1054 non-null   int64 
 9   A9                      1054 non-null   int64 
 10  A10                     1054 non-null   int64 
 11  Age_Mons                1054 non-null   int64 
 12  Qchat-10-Score          1054 non-null   int64 
 13  Sex                     1054 non-null   object
 14  Ethnicity               1054 non-nu

In [11]:
df.head()

Unnamed: 0,Case_No,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,Age_Mons,Qchat-10-Score,Sex,Ethnicity,Jaundice,Family_mem_with_ASD,Who completed the test,Class/ASD Traits
0,1,0,0,0,0,0,0,1,1,0,1,28,3,0,8,1,0,4,0
1,2,1,1,0,0,0,1,1,0,0,0,36,4,1,5,1,0,4,1
2,3,1,0,0,0,0,0,1,1,0,1,36,4,1,8,1,0,4,1
3,4,1,1,1,1,1,1,1,1,1,1,24,10,1,0,0,0,4,1
4,5,1,1,0,1,1,1,1,1,1,1,20,9,0,5,0,1,4,1
