In [13]:
# Import necessary libraries
import pandas as pd  # For handling data in DataFrame format
from sklearn.model_selection import train_test_split  # To split data into training and testing sets
from sklearn.preprocessing import StandardScaler  # For normalizing numerical data
from sklearn.linear_model import LogisticRegression  # Logistic Regression model for classification
from sklearn.metrics import accuracy_score  # To evaluate model performance
from google.colab import files  # To allow file upload in Google Colab

# Prompt user to upload a CSV file in Google Colab
print("Please upload your CSV file")
uploaded = files.upload()  # Opens file upload dialog
csv_filename = list(uploaded.keys())[0]  # Get uploaded file name
df = pd.read_csv(csv_filename)  # Load dataset into a pandas DataFrame

# Check and remove missing values (if any)
df.dropna(inplace=True)  # Drops any rows with missing values to ensure clean data

# Convert 'CreditScore' into a binary category: 'Good' (1) or 'Bad' (0)
df['CreditCategory'] = df['CreditScore'].apply(lambda x: 1 if x > 600 else 0)

# Display the distribution of 'Good' vs 'Bad' credit scores
print("Class distribution:\n", df['CreditCategory'].value_counts())

# Select independent variables (features) and dependent variable (target)
X = df[['Age', 'Income', 'LoanAmount']]  # Features used for prediction
y = df['CreditCategory']  # Target variable (1 for good credit, 0 for bad credit)

# Split the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature values to have mean 0 and variance 1 (better model performance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit and transform training data
X_test = scaler.transform(X_test)  # Transform test data using the same scaler

# Train the Logistic Regression model with balanced class weights (to handle imbalanced data)
model = LogisticRegression(class_weight='balanced')  # Adjusts for class imbalance
model.fit(X_train, y_train)  # Train the model on training data

# Make predictions using the trained model on the test dataset
y_pred = model.predict(X_test)

# Calculate and print the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')  # Display accuracy as a percentage (e.g., 85%)


Please upload your CSV file


Saving credit_data.csv to credit_data (11).csv
Class distribution:
 CreditCategory
1    12
0     8
Name: count, dtype: int64
Accuracy: 0.75
