# Artificial Intelligence and Predictions
### Case: Customer Credit Score
You have been hired by a bank to determine the credit score of its customers. You need to analyze all the bank's customers and, based on this analysis, create a model that can read customer information and automatically determine their credit score: Poor, Standard, Good

In [1]:
# Imports
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
table = pd.read_csv("clients.csv") # Imports the database

display(table) # Displays the table

Unnamed: 0,client_id,month,age,occupation,annual_salary,accounts_num,cards_num,loan_interest,loans_num,days_late,...,credit_history_age,monthly_investment,payment_behavior,final_month_balance,credit_score,car_loan,home_loan,personal_loan,credit_loan,student_loan
0,3392,1,23.0,scientist,19114.12,3.0,4.0,3.0,4.0,3.0,...,265.0,21.465380,high_spending_low_payment,312.494089,Good,1,1,1,1,0
1,3392,2,23.0,scientist,19114.12,3.0,4.0,3.0,4.0,3.0,...,266.0,21.465380,low_spending_high_payment,284.629162,Good,1,1,1,1,0
2,3392,3,23.0,scientist,19114.12,3.0,4.0,3.0,4.0,3.0,...,267.0,21.465380,low_spending_medium_payment,331.209863,Good,1,1,1,1,0
3,3392,4,23.0,scientist,19114.12,3.0,4.0,3.0,4.0,5.0,...,268.0,21.465380,low_spending_low_payment,223.451310,Good,1,1,1,1,0
4,3392,5,23.0,scientist,19114.12,3.0,4.0,3.0,4.0,6.0,...,269.0,21.465380,high_spending_medium_payment,341.489231,Good,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,37932,4,25.0,mechanic,39628.99,4.0,6.0,7.0,2.0,23.0,...,378.0,24.028477,high_spending_high_payment,479.866228,Poor,1,0,0,0,1
99996,37932,5,25.0,mechanic,39628.99,4.0,6.0,7.0,2.0,18.0,...,379.0,24.028477,high_spending_medium_payment,496.651610,Poor,1,0,0,0,1
99997,37932,6,25.0,mechanic,39628.99,4.0,6.0,7.0,2.0,27.0,...,380.0,24.028477,high_spending_high_payment,516.809083,Poor,1,0,0,0,1
99998,37932,7,25.0,mechanic,39628.99,4.0,6.0,7.0,2.0,20.0,...,381.0,24.028477,low_spending_high_payment,319.164979,Standard,1,0,0,0,1


In [3]:
encoder = LabelEncoder() # Create an instance of the LabelEncoder class to transform string elements into integer elements

# Encodes the string columns
table["occupation"] = encoder.fit_transform(table["occupation"])
table["credit_mix"] = encoder.fit_transform(table["credit_mix"])
table["payment_behavior"] = encoder.fit_transform(table["payment_behavior"])

display(table) # Displays the encoded table

Unnamed: 0,client_id,month,age,occupation,annual_salary,accounts_num,cards_num,loan_interest,loans_num,days_late,...,credit_history_age,monthly_investment,payment_behavior,final_month_balance,credit_score,car_loan,home_loan,personal_loan,credit_loan,student_loan
0,3392,1,23.0,13,19114.12,3.0,4.0,3.0,4.0,3.0,...,265.0,21.465380,1,312.494089,Good,1,1,1,1,0
1,3392,2,23.0,13,19114.12,3.0,4.0,3.0,4.0,3.0,...,266.0,21.465380,3,284.629162,Good,1,1,1,1,0
2,3392,3,23.0,13,19114.12,3.0,4.0,3.0,4.0,3.0,...,267.0,21.465380,5,331.209863,Good,1,1,1,1,0
3,3392,4,23.0,13,19114.12,3.0,4.0,3.0,4.0,5.0,...,268.0,21.465380,4,223.451310,Good,1,1,1,1,0
4,3392,5,23.0,13,19114.12,3.0,4.0,3.0,4.0,6.0,...,269.0,21.465380,2,341.489231,Good,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,37932,4,25.0,9,39628.99,4.0,6.0,7.0,2.0,23.0,...,378.0,24.028477,0,479.866228,Poor,1,0,0,0,1
99996,37932,5,25.0,9,39628.99,4.0,6.0,7.0,2.0,18.0,...,379.0,24.028477,2,496.651610,Poor,1,0,0,0,1
99997,37932,6,25.0,9,39628.99,4.0,6.0,7.0,2.0,27.0,...,380.0,24.028477,0,516.809083,Poor,1,0,0,0,1
99998,37932,7,25.0,9,39628.99,4.0,6.0,7.0,2.0,20.0,...,381.0,24.028477,3,319.164979,Standard,1,0,0,0,1


In [4]:
x = table.drop(columns=["client_id", "credit_score"]) # Defines x as the table without the "client_id" column (unnecessary for prediction) and "credit_score" column (column to be predicted)

y = table["credit_score"] # Defines y as the column to be predicted

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) # Splits the table into a training set and a test set for x and y

In [5]:
decisiontree_model = RandomForestClassifier() # Creates an instance of the RandomForestClassifier class to build a decision tree model artificial intelligence

knn_model = KNeighborsClassifier() # Creates an instance of the KNeighborsClassifier class to build a k-nearest neighbors model artificial intelligence

decisiontree_model.fit(x_train, y_train) # Training the RandomForestClassifier model on the training dataset

knn_model.fit(x_train, y_train) # Training the KNeighborsClassifier model on the training dataset

In [6]:
decisiontree_prediction = decisiontree_model.predict(x_test) # Predict the credit scores using the RandomForestClassifier model

knn_prediction = knn_model.predict(x_test) # Predict the credit scores using the KNeighborsClassifier model

print("{:.1%}".format(accuracy_score(y_test, decisiontree_prediction))) # Display the accuracy score of the RandomForestClassifier model
                                                                        #predictions with the actual credit scores in the test set (y_test)

print("{:.1%}".format(accuracy_score(y_test, knn_prediction))) # Display the accuracy score of the KNeighborsClassifier model
                                                                        #predictions with the actual credit scores in the test set (y_test)

82.7%
73.7%


# Analysis of each AI model
#### Decision Tree Model
This model achieved an accuracy of approximately 82%
#### KNN Model
This model achieved an accuracy of approximately 73%
## Conclusion
Since the Decision Tree model achieved a higher accuracy, it will be the model used

In [7]:
new_clients = pd.read_csv("new_clients.csv") # Imports the new_clients database

# Encodes the string columns of the new_clients database
new_clients["occupation"] = encoder.fit_transform(new_clients["occupation"])
new_clients["credit_mix"] = encoder.fit_transform(new_clients["credit_mix"])
new_clients["payment_behavior"] = encoder.fit_transform(new_clients["payment_behavior"])

prediction = decisiontree_model.predict(new_clients) # Predicts the credit scores for the new clients using the trained decision tree model

print(prediction) # Displays the predictions for the credit scores of the new clients

['Poor' 'Good' 'Standard']


# Final Conclusion
##### After analyzing the performance of the two AI models, it was determined that the Decision Tree model achieved a higher accuracy of approximately 82% compared to the KNN model, which achieved an accuracy of approximately 73%.
##### Therefore, the Decision Tree model was used for predicting the credit scores of the new clients.