In [75]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import DataLoader,Dataset

In [63]:
pd.set_option('display.max_columns', None)

In [64]:
df = pd.read_csv('loan_train.csv',header=0,index_col=0)
df.head()

Unnamed: 0_level_0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,loan_status
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,37,35000,RENT,0.0,EDUCATION,B,6000,11.49,0.17,N,14,0
1,22,56000,OWN,6.0,MEDICAL,C,4000,13.35,0.07,N,2,0
2,29,28800,OWN,8.0,PERSONAL,A,6000,8.9,0.21,N,10,0
3,30,70000,RENT,14.0,VENTURE,B,12000,11.11,0.17,N,5,0
4,22,60000,RENT,2.0,MEDICAL,A,6000,6.92,0.1,N,3,0


In [65]:
df.shape

(58645, 12)

In [66]:
df.dtypes

person_age                      int64
person_income                   int64
person_home_ownership          object
person_emp_length             float64
loan_intent                    object
loan_grade                     object
loan_amnt                       int64
loan_int_rate                 float64
loan_percent_income           float64
cb_person_default_on_file      object
cb_person_cred_hist_length      int64
loan_status                     int64
dtype: object

In [67]:
df.isna().sum()

person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
loan_status                   0
dtype: int64

In [68]:
df['loan_status'].value_counts()

loan_status
0    50295
1     8350
Name: count, dtype: int64

In [69]:
x,y = df.iloc[:,:df.shape[1]-1],df.iloc[:,-1]

In [70]:
categorical_cols = x.select_dtypes(include='object').columns
numerical_cols = x.select_dtypes(include=['float64','int64']).columns

In [71]:
x[numerical_cols] = StandardScaler().fit_transform(x[numerical_cols])

In [72]:
for column in categorical_cols:
    dummies = pd.get_dummies(x[column], prefix=column, drop_first=False, dtype=int)
    x = x.drop(column, axis=1)
    for col in dummies.columns:
        x[col] = dummies[col]

In [73]:
x.head()

Unnamed: 0_level_0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,cb_person_default_on_file_N,cb_person_default_on_file_Y
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
0,1.5662,-0.765768,-1.1872,-0.578306,0.267616,0.117378,2.031798,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0
1,-0.920057,-0.212128,0.328047,-0.937775,0.880532,-0.973242,-0.946489,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0
2,0.240196,-0.929223,0.83313,-0.578306,-0.585854,0.553626,1.039036,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0
3,0.405947,0.156966,2.348377,0.500101,0.142396,0.117378,-0.201917,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0
4,-0.920057,-0.106673,-0.682117,-0.578306,-1.238314,-0.646056,-0.698298,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0


In [74]:
y.head()

id
0    0
1    0
2    0
3    0
4    0
Name: loan_status, dtype: int64

In [76]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)

In [None]:
class MyDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Get image and label from the DataFrame
        image = self.dataframe.iloc[idx]['image']
        label = self.dataframe.iloc[idx]['label']

        # Convert image (which may be a flattened numpy array) to torch tensor
        image = torch.tensor(image, dtype=torch.float32).reshape(1, 28, 28)  # Adjust shape as necessary
        
        # Apply any transformations (e.g., normalization)
        if self.transform:
            image = self.transform(image)

        return image, label
    
dataset = FashionMNISTDataset(dataframe=df, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)