In [8]:
#knn classification
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import gdown

# Download the dataset
file_id = "1xiuH2gH0kb2CCNgVW6Alw5_sS_AD7KeU"
url = f"https://drive.google.com/uc?export=download&id={file_id}"
output = 'student-mat.csv'
gdown.download(url, output, quiet=False)

# Load the dataset
df = pd.read_csv(output, sep=',')

# Assuming the target column is 'G3' (final grade)
target_column = 'G3'

# Convert categorical columns to dummy variables
df = pd.get_dummies(df, drop_first=True)

# Check if the target column 'G3' exists after preprocessing
if target_column in df.columns:
    # Split the data into features and target variable
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the KNN model
    knn_model = KNeighborsClassifier(n_neighbors=5)
    knn_model.fit(X_train_scaled, y_train)

    # Make predictions
    y_pred = knn_model.predict(X_test_scaled)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Example: Predicting for a new example person
    example_data = {
        'age': [18],
        'Medu': [4],
        'Fedu': [4],
        'traveltime': [1],
        'studytime': [2],
        'failures': [0],
        'famrel': [4],
        'freetime': [3],
        'goout': [4],
        'Dalc': [1],
        'Walc': [1],
        'health': [3],
        'absences': [6],
        'G1': [5],
        'G2': [6],
        'school_MS': [0],
        'sex_M': [1],
        'address_U': [1],
        'famsize_LE3': [0],
        'Pstatus_T': [1],
        'Mjob_health': [0],
        'Mjob_other': [1],
        'Mjob_services': [0],
        'Mjob_teacher': [0],
        'Fjob_health': [0],
        'Fjob_other': [1],
        'Fjob_services': [0],
        'Fjob_teacher': [0],
        'reason_home': [0],
        'reason_other': [0],
        'reason_reputation': [0],
        'guardian_mother': [1],
        'guardian_other': [0],
        'schoolsup_yes': [0],
        'famsup_yes': [1],
        'paid_yes': [0],
        'activities_yes': [1],
        'nursery_yes': [1],
        'higher_yes': [1],
        'internet_yes': [0],
        'romantic_yes': [0]
    }

    example_person = pd.DataFrame(example_data)

    # Ensure the example person has the same columns as the training data
    example_person = example_person.reindex(columns=X_train.columns, fill_value=0)

    # Scale the example person's features
    example_person_scaled = scaler.transform(example_person)

    # Predict the final grade for the example person
    grade_prediction = knn_model.predict(example_person_scaled)
    print("Predicted final grade for the example person:", grade_prediction[0])
else:
    print(f"The target column '{target_column}' does not exist in the dataset after preprocessing.")



Downloading...
From: https://drive.google.com/uc?export=download&id=1xiuH2gH0kb2CCNgVW6Alw5_sS_AD7KeU
To: /content/student-mat.csv
100%|██████████| 42.0k/42.0k [00:00<00:00, 51.7MB/s]

Accuracy: 0.10126582278481013
Predicted final grade for the example person: 9





In [13]:
#logistic
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import gdown

# Download the dataset
file_id = "1xiuH2gH0kb2CCNgVW6Alw5_sS_AD7KeU"
url = f"https://drive.google.com/uc?export=download&id={file_id}"
output = 'student-mat.csv'
gdown.download(url, output, quiet=False)

# Load the dataset correctly using the correct separator
df = pd.read_csv(output, sep=',')


# Assuming the target column is 'G3' (final grade)
target_column = 'G3'

# Convert categorical columns to dummy variables
df = pd.get_dummies(df, drop_first=True)

# Check if the target column 'G3' exists after preprocessing
if target_column in df.columns:
    # Split the data into features and target variable
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the Logistic Regression model with increased max_iter
    logreg_model = LogisticRegression(max_iter=1000)
    logreg_model.fit(X_train_scaled, y_train)

    # Make predictions
    y_pred = logreg_model.predict(X_test_scaled)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Example: Predicting for a new example person
    example_data = {
        'age': [18],
        'Medu': [4],
        'Fedu': [4],
        'traveltime': [1],
        'studytime': [2],
        'failures': [0],
        'famrel': [4],
        'freetime': [3],
        'goout': [4],
        'Dalc': [1],
        'Walc': [1],
        'health': [3],
        'absences': [6],
        'G1': [5],
        'G2': [6],
        'school_MS': [0],
        'sex_M': [1],
        'address_U': [1],
        'famsize_LE3': [0],
        'Pstatus_T': [1],
        'Mjob_health': [0],
        'Mjob_other': [1],
        'Mjob_services': [0],
        'Mjob_teacher': [0],
        'Fjob_health': [0],
        'Fjob_other': [1],
        'Fjob_services': [0],
        'Fjob_teacher': [0],
        'reason_home': [0],
        'reason_other': [0],
        'reason_reputation': [0],
        'guardian_mother': [1],
        'guardian_other': [0],
        'schoolsup_yes': [0],
        'famsup_yes': [1],
        'paid_yes': [0],
        'activities_yes': [1],
        'nursery_yes': [1],
        'higher_yes': [1],
        'internet_yes': [0],
        'romantic_yes': [0]
    }

    example_person = pd.DataFrame(example_data)

    # Ensure the example person has the same columns as the training data
    example_person = example_person.reindex(columns=X_train.columns, fill_value=0)

    # Scale the example person's features
    example_person_scaled = scaler.transform(example_person)

    # Predict the final grade for the example person
    grade_prediction = logreg_model.predict(example_person_scaled)
    print("Predicted final grade for the example person:", grade_prediction[0])
else:
    print(f"The target column '{target_column}' does not exist in the dataset after preprocessing.")


Downloading...
From: https://drive.google.com/uc?export=download&id=1xiuH2gH0kb2CCNgVW6Alw5_sS_AD7KeU
To: /content/student-mat.csv
100%|██████████| 42.0k/42.0k [00:00<00:00, 56.0MB/s]

Accuracy: 0.22784810126582278
Predicted final grade for the example person: 9





In [12]:
#svm
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import gdown

# Download the dataset
file_id = "1xiuH2gH0kb2CCNgVW6Alw5_sS_AD7KeU"
url = f"https://drive.google.com/uc?export=download&id={file_id}"
output = 'student-mat.csv'
gdown.download(url, output, quiet=False)

# Load the dataset correctly using the correct separator
df = pd.read_csv(output, sep=',')


# Assuming the target column is 'G3' (final grade)
target_column = 'G3'

# Convert categorical columns to dummy variables
df = pd.get_dummies(df, drop_first=True)

# Check if the target column 'G3' exists after preprocessing
if target_column in df.columns:
    # Split the data into features and target variable
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the SVM model
    svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')  # You can adjust kernel, C, and gamma parameters
    svm_model.fit(X_train_scaled, y_train)

    # Make predictions
    y_pred = svm_model.predict(X_test_scaled)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Example: Predicting for a new example person
    example_data = {
        'age': [18],
        'Medu': [4],
        'Fedu': [4],
        'traveltime': [1],
        'studytime': [2],
        'failures': [0],
        'famrel': [4],
        'freetime': [3],
        'goout': [4],
        'Dalc': [1],
        'Walc': [1],
        'health': [3],
        'absences': [6],
        'G1': [5],
        'G2': [6],
        'school_MS': [0],
        'sex_M': [1],
        'address_U': [1],
        'famsize_LE3': [0],
        'Pstatus_T': [1],
        'Mjob_health': [0],
        'Mjob_other': [1],
        'Mjob_services': [0],
        'Mjob_teacher': [0],
        'Fjob_health': [0],  # Added missing column
        'Fjob_other': [1],
        'Fjob_services': [0],
        'Fjob_teacher': [0],
        'reason_home': [0],
        'reason_other': [0],
        'reason_reputation': [0],
        'guardian_mother': [1],
        'guardian_other': [0],
        'schoolsup_yes': [0],
        'famsup_yes': [1],
        'paid_yes': [0],
        'activities_yes': [1],
        'nursery_yes': [1],
        'higher_yes': [1],
        'internet_yes': [0],
        'romantic_yes': [0]
    }

    example_person = pd.DataFrame(example_data)

    # Ensure the example person has the same columns as the training data
    example_person = example_person.reindex(columns=X_train.columns, fill_value=0)

    # Scale the example person's features
    example_person_scaled = scaler.transform(example_person)

    # Predict the final grade for the example person
    grade_prediction = svm_model.predict(example_person_scaled)
    print("Predicted final grade for the example person:", grade_prediction[0])
else:
    print(f"The target column '{target_column}' does not exist in the dataset after preprocessing.")


Downloading...
From: https://drive.google.com/uc?export=download&id=1xiuH2gH0kb2CCNgVW6Alw5_sS_AD7KeU
To: /content/student-mat.csv
100%|██████████| 42.0k/42.0k [00:00<00:00, 33.7MB/s]

Accuracy: 0.20253164556962025
Predicted final grade for the example person: 0



