<a href="https://colab.research.google.com/github/DESAI-SREENIJA/ASSIGNMENT-1-MLTT/blob/main/Student_performance_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import os

# Extract the dataset
zip_path = "/content/student.zip"
extract_path = "/mnt/data/"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Find the extracted CSV file
for file in os.listdir(extract_path):
    if file.endswith(".csv"):
        csv_path = os.path.join(extract_path, file)
        break

# Load the dataset
df = pd.read_csv(csv_path)

# Display first few rows
print(df.head())

# Check for missing values
print("Missing Values: ")
print(df.isnull().sum())

# Display column data types
print("\nColumn Data Types:")
print(df.dtypes)

# Dataset size
print("\nDataset Shape:", df.shape)

# Data Cleaning
# Handle missing values by replacing with median
df.fillna(df.median(numeric_only=True), inplace=True)

# Remove duplicates
df.drop_duplicates(inplace=True)

# Ensure 'G3' column exists and is numeric
if 'G3' in df.columns:
    df['G3'] = pd.to_numeric(df['G3'], errors='coerce')
else:
    print("Error: 'G3' column not found in dataset.")

# Data Analysis
if 'G3' in df.columns:
    # 1. Average final grade (G3)
    avg_g3 = df['G3'].mean()
    print("\nAverage Final Grade (G3):", avg_g3)

    # 2. Students scoring above 15 in G3
    students_above_15 = (df['G3'] > 15).sum()
    print("\nNumber of students scoring above 15 in G3:", students_above_15)

    # 3. Correlation between study time and G3
    if 'studytime' in df.columns:
        correlation = df[['studytime', 'G3']].corr().iloc[0, 1]
        print("\nCorrelation between study time and G3:", correlation)
    else:
        print("Error: 'studytime' column not found in dataset.")

    # 4. Average G3 per gender
    if 'sex' in df.columns:
        avg_g3_gender = df.groupby('sex')['G3'].mean()
        print("\nAverage Final Grade by Gender:")
        print(avg_g3_gender)
    else:
        print("Error: 'sex' column not found in dataset.")
else:
    print("Error: Unable to perform analysis due to missing 'G3' column.")

# Data Visualization
if 'G3' in df.columns:
    plt.figure(figsize=(12, 4))

    # 1. Histogram of final grades (G3)
    plt.subplot(1, 3, 1)
    sns.histplot(df['G3'].dropna(), bins=10, kde=True)
    plt.xlabel('Final Grade (G3)')
    plt.title('Distribution of Final Grades')

    # 2. Scatter plot: Study time vs G3
    if 'studytime' in df.columns:
        plt.subplot(1, 3, 2)
        sns.scatterplot(x=df['studytime'], y=df['G3'])
        plt.xlabel('Study Time (hours per week)')
        plt.ylabel('Final Grade (G3)')
        plt.title('Study Time vs Final Grade')

    # 3. Bar chart: Average G3 by gender
    if 'sex' in df.columns:
        plt.subplot(1, 3, 3)
        sns.barplot(x=avg_g3_gender.index, y=avg_g3_gender.values)
        plt.xlabel('Gender')
        plt.ylabel('Average Final Grade (G3)')
        plt.title('Average G3 by Gender')

    plt.tight_layout()
    plt.show()
else:
    print("Skipping visualization due to missing 'G3' column.")


  school;sex;age;address;famsize;Pstatus;Medu;Fedu;Mjob;Fjob;reason;guardian;traveltime;studytime;failures;schoolsup;famsup;paid;activities;nursery;higher;internet;romantic;famrel;freetime;goout;Dalc;Walc;health;absences;G1;G2;G3
0  GP;"F";18;"U";"GT3";"A";4;4;"at_home";"teacher...                                                                                                                                                                                 
1  GP;"F";17;"U";"GT3";"T";1;1;"at_home";"other";...                                                                                                                                                                                 
2  GP;"F";15;"U";"LE3";"T";1;1;"at_home";"other";...                                                                                                                                                                                 
3  GP;"F";15;"U";"GT3";"T";4;2;"health";"services...                            