In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('credit_data.csv')

# Display the first few rows of the dataset
print(data.head())

# Define features (X) and target (y)
# Features: Age, Income, LoanAmount
# Target: CreditScore
X = data[['Age', 'Income', 'LoanAmount']]
y = data['CreditScore']

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
# Mean Squared Error (MSE) measures the average squared difference between predicted and actual values
mse = mean_squared_error(y_test, y_pred)
# R-squared (R2) measures how well the model explains the variance in the target variable
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

# Predict a credit score for a new data point
# Replace the values below with actual data for prediction
new_data = pd.DataFrame({
    'Age': [30],  # Age of the customer
    'Income': [50000],  # Annual income
    'LoanAmount': [20000]  # Loan amount
})

predicted_score = model.predict(new_data)
print(f"Predicted Credit Score: {predicted_score[0]}")

   CustomerID  Age  Income  LoanAmount  CreditScore
0           1   34   63961       13576          531
1           2   50   34158       19395          323
2           3   67   92702       37663          416
3           4   26   84846       44572          747
4           5   58   40787       25598          805
Mean Squared Error: 77039.2842810213
R-squared: -9.44644729434587
Predicted Credit Score: 651.0666706564242
