In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from sklearn.metrics import confusion_matrix
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras.applications.xception import Xception, preprocess_input, decode_predictions
import keras


# Data Loading

### Loading the dataset information file


In [None]:
train_label = pd.read_csv('/kaggle/input/jraigs-dataset/justRAIGS/JustRAIGS_Train_labels.csv' , sep = ";" , encoding ="utf-8")

In [None]:
train_label.head().T

In [None]:
labels=pd.DataFrame(train_label)

In [None]:
pd.set_option('display.max_columns',None)

In [None]:
labels.sample(100)

In [None]:
print('Missing values in the dataset : ',labels.isna().sum().sum())

In [None]:
print('Missing values in the dataset : ',labels['Age'].isna().sum().sum())

In [None]:
print('Duplicate values in the dataset : ',labels.duplicated().sum())

In [None]:
labels.info()

In [None]:
labels.columns

In [None]:
labels.describe().T

In [None]:
labels.describe(include='O')

## Loading Image Data

In [None]:
'''
directories = [
    '/kaggle/input/jraigs-dataset/justRAIGS/0/0',
    '/kaggle/input/jraigs-dataset/justRAIGS/1',
    '/kaggle/input/jraigs-dataset/justRAIGS/2',
    '/kaggle/input/jraigs-dataset/justRAIGS/3',
    '/kaggle/input/jraigs-dataset/justRAIGS/4',
    '/kaggle/input/jraigs-dataset/justRAIGS/5'
]
'''

In [None]:
directories = [
    '/kaggle/input/jraigs-dataset/justRAIGS/5'
]

In [None]:
all_images = [] 
extracted_df = pd.DataFrame(columns=labels.columns)

In [None]:
for directory in directories:
    image_files = os.listdir(directory)
    for image_file in image_files:
        if not image_file.endswith('.db'):
            image_path = os.path.join(directory, image_file)
            try:
                image = Image.open(image_path)
                all_images.append(image)
                image_name = os.path.splitext(image_file)[0]
                if image_name in labels['Eye ID'].values:
                    extracted_df = pd.concat([extracted_df, labels[labels['Eye ID'] == image_name]], ignore_index=True)
            except Exception as e:
                print(f"Error loading image: {image_path} - {e}")

In [None]:
labels=extracted_df

In [None]:
num_samples = 5
fig, axes = plt.subplots(1, num_samples, figsize=(20,10))

for i, ax in enumerate(axes):
    if i < len(all_images):
        ax.imshow(all_images[i])
        ax.axis('off')
    else:
        ax.axis('off')  # Hide empty subplots

plt.tight_layout()
plt.show()

# Exploratory Data Analysis

In [None]:
label_counts = labels['Final Label'].value_counts()
print(label_counts)

### Data Visualization

In [None]:
plt.figure(figsize=(8, 6))
label_counts.plot(kind='bar')
plt.title('Distribution of Final Labels')
plt.xlabel('Labels')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
labels_nrg = labels[labels['Final Label'] == 'NRG']
labels_rg = labels[labels['Final Label'] == 'RG']

In [None]:
grouped_counts_nrg = labels_nrg.groupby('Age').size()

grouped_counts_rg = labels_rg.groupby('Age').size()

In [None]:
plt.figure(figsize=(9, 5))
bars_nrg = plt.bar(grouped_counts_nrg.index, grouped_counts_nrg.values)

plt.xlabel('Age')
plt.ylabel('Count')
plt.title('Age vs. Count (NRG)')
plt.xticks(rotation=90)  # Rotate x-axis labels for better visibility if needed
plt.show()

In [None]:
plt.figure(figsize=(9, 5))
bars_rg = plt.bar(grouped_counts_rg.index, grouped_counts_rg.values)

plt.xlabel('Age')
plt.ylabel('Count')
plt.title('Age vs. Count (RG)')
plt.xticks(rotation=90) 
plt.show()

In [None]:
columns = labels.columns

In [None]:
print(columns)

### Correlation Matrix

In [None]:
#Convert labels to categorical variables
labels[ 'Final Label'] = labels['Final Label'].astype('category')
labels['Label G1'] = labels['Label G1'].fillna('NA').astype('category')
labels['Label G2'] = labels['Label G2'].fillna('NA').astype('category')
labels['Label G3'] = labels['Label G3'].fillna('NA').astype('category')


In [None]:
categ_columns=['Final Label', 'Label G1', 'Label G2', 'Label G3']

In [None]:
labels[categ_columns] = labels[categ_columns].apply(lambda x: x.cat.codes)
labels[categ_columns].info()

In [None]:
elim_columns=['Eye ID','Fellow Eye ID']
corr_columns=labels.loc[:, ~labels.columns.isin(elim_columns)]
corr_columns.columns

In [None]:
correlation_df = labels[corr_columns.columns]

In [None]:
correlation_df.head()
#RG-1.0
#NRG-0.0
#U-2.0

In [None]:
correlation_df[categ_columns].info()

In [None]:
corr_matrix=correlation_df.corr()

In [None]:
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
plt.figure(figsize=(25, 20))
sns.heatmap(corr_matrix,mask=mask, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix Heatmap')
plt.xticks(rotation=90)
plt.yticks(rotation=0)  
plt.tight_layout() 
plt.show()

## Feature Selection

In [None]:
#Using Correlation Matrix
select_df=labels.copy()
select_df.drop(['G3 DH','G2 DH','G1 DH','G3 RNFLDS', 'G3 RNFLDI','G2 RNFLDS', 'G2 RNFLDI','G1 RNFLDS', 'G1 RNFLDI','G1 BCLVS', 'G1 BCLVI','G2 BCLVS', 'G2 BCLVI','G3 BCLVS', 'G3 BCLVI'],axis=1, inplace=True)

In [None]:
select_df.head()

## Data Splitting

### Tabular Data

In [None]:
X = select_df.loc[:, select_df.columns != 'Final Label']
y = select_df.loc[:, 'Final Label']

In [None]:
X.columns

In [None]:
X.head()

In [None]:
y.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

### Image Data

In [None]:
image_directory = '/kaggle/input/jraigs-dataset/justRAIGS/5'
train_directory = '/path/to/train'  
test_directory = '/path/to/test'  
valid_directory = '/path/to/valid'

In [None]:
for image_id in X_train['Eye ID'].head():  # Print the first few entries for debugging
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    print(f"Checking {src_path}: Exists - {os.path.exists(src_path)}")

In [None]:
# Drop rows related to non-existant .jpg files (train subset)
rows_to_drop = []
for image_id in X_train['Eye ID']:
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')

    if not os.path.exists(src_path):

        rows_to_drop.append(X_train[X_train['Eye ID'] == image_id].index[0])

X_train = X_train.drop(rows_to_drop)
X_train = X_train.reset_index(drop=True)

In [None]:
for image_id in X_train['Eye ID'].head():  # Print the first few entries for debugging
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    print(f"Checking {src_path}: Exists - {os.path.exists(src_path)}")

In [None]:
for image_id in X_valid['Eye ID'].head():  # Print the first few entries for debugging
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    print(f"Checking {src_path}: Exists - {os.path.exists(src_path)}")

In [None]:
# Drop rows related to non-existant .jpg files (valid subset)
rows_to_drop_valid = []

for image_id in X_valid['Eye ID']:
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    if not os.path.exists(src_path):
        rows_to_drop_valid.append(X_valid[X_valid['Eye ID'] == image_id].index[0])

X_valid = X_valid.drop(rows_to_drop_valid)
X_valid = X_valid.reset_index(drop=True)

In [None]:
for image_id in X_valid['Eye ID'].head():  # Print the first few entries for debugging
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    print(f"Checking {src_path}: Exists - {os.path.exists(src_path)}")

In [None]:
for image_id in X_test['Eye ID'].head():  # Print the first few entries for debugging
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    print(f"Checking {src_path}: Exists - {os.path.exists(src_path)}")

In [None]:
# Drop rows related to non-existant .jpg files (test subset)
rows_to_drop_test = []

for image_id in X_test['Eye ID']:
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    if not os.path.exists(src_path):
        rows_to_drop_test.append(X_test[X_test['Eye ID'] == image_id].index[0])

X_test = X_test.drop(rows_to_drop_test)
X_test = X_test.reset_index(drop=True)

In [None]:
for image_id in X_test['Eye ID'].head():  # Print the first few entries for debugging
    src_path = os.path.join('/kaggle/input/jraigs-dataset/justRAIGS/5', image_id + '.jpg')
    print(f"Checking {src_path}: Exists - {os.path.exists(src_path)}")

### Image data Splitting

In [None]:
os.makedirs('/path/to/train', exist_ok=True)
os.makedirs('/path/to/valid', exist_ok=True)
os.makedirs('/path/to/test', exist_ok=True)

print(f"Train directory: Exists - {os.path.exists('/path/to/train')}")
print(f"Valid directory: Exists - {os.path.exists('/path/to/valid')}")
print(f"Test directory: Exists - {os.path.exists('/path/to/test')}")


In [None]:
def create_symlinks(image_ids, source_dir, dest_dir):
    for image_id in image_ids:
        src_path = os.path.join(source_dir, image_id + '.jpg')
        dst_path = os.path.join(dest_dir, image_id + '.jpg')
        if not os.path.exists(dst_path):  # Avoid overwriting if the symlink already exists
            os.symlink(src_path, dst_path)


In [None]:

create_symlinks(X_train['Eye ID'], '/kaggle/input/jraigs-dataset/justRAIGS/5', '/path/to/train')


create_symlinks(X_valid['Eye ID'], '/kaggle/input/jraigs-dataset/justRAIGS/5', '/path/to/valid')


create_symlinks(X_test['Eye ID'], '/kaggle/input/jraigs-dataset/justRAIGS/5', '/path/to/test')


In [None]:
train_symlinks_dir = '/path/to/train_symlinks'
valid_symlinks_dir = '/path/to/valid_symlinks'
test_symlinks_dir = '/path/to/test_symlinks'

# Model Training

### With Image Data

In [None]:
model = keras.applications.Xception(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

In [None]:
print(model.summary())
