<a href="https://colab.research.google.com/github/Jasobantapadhan/ALL_INDIA_HACKATHON/blob/main/Machinelearn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Basic Libraries
import numpy as np
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt

# Sklearn Libraries
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')


In [4]:
data_dir = "path_to_aptos2019"  # Remove trailing slash if needed
image_folder = os.path.join(data_dir, "train_images")
csv_path = os.path.join(data_dir, "train.csv")

# Verify if the file exists
if not os.path.exists(csv_path):
    print("CSV file not found. Check the file path.")
else:
    df = pd.read_csv(csv_path)
    print(df.head())


CSV file not found. Check the file path.


In [None]:
# Define image size and classes
IMG_SIZE = 128  # Resize all images to 128x128

# Load images and labels
def load_data(image_folder, df):
    data = []
    labels = []

    for index, row in df.iterrows():
        img_path = os.path.join(image_folder, row['id_code'] + ".png")
        if os.path.exists(img_path):
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))  # Resize image
            data.append(img.flatten())  # Flatten to 1D array
            labels.append(row['diagnosis'])

    return np.array(data), np.array(labels)

# Load dataset
X, y = load_data(image_folder, df)
print(f"Dataset loaded successfully! Shape: {X.shape}")


In [None]:
# Normalize pixel values to [0, 1]
X = X / 255.0

# Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")


In [None]:
# Apply PCA for dimensionality reduction
pca = PCA(n_components=100)  # Keep 100 principal components
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

print(f"Original shape: {X_train.shape}, Transformed shape: {X_train_pca.shape}")


In [None]:
# Initialize Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on PCA-transformed data
rf_model.fit(X_train_pca, y_train)

# Predict on test data
y_pred = rf_model.predict(X_test_pca)


In [None]:
# Evaluate performance
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score: ", accuracy_score(y_test, y_pred))


In [None]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

# SVM Model
svm_model = SVC(kernel='rbf', C=1.0, random_state=42)
svm_model.fit(X_train_pca, y_train)
svm_pred = svm_model.predict(X_test_pca)
print("\nSVM Accuracy:", accuracy_score(y_test, svm_pred))

# KNN Model
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_pca, y_train)
knn_pred = knn_model.predict(X_test_pca)
print("\nKNN Accuracy:", accuracy_score(y_test, knn_pred))

# Logistic Regression Model
log_model = LogisticRegression(max_iter=1000, random_state=42)
log_model.fit(X_train_pca, y_train)
log_pred = log_model.predict(X_test_pca)
print("\nLogistic Regression Accuracy:", accuracy_score(y_test, log_pred))


In [5]:
# Plot Confusion Matrix
import seaborn as sns
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.title('Confusion Matrix - Random Forest')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


NameError: name 'y_test' is not defined