In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv('credit_card_users_segmented.csv')

# Drop non-numeric or irrelevant columns
df = df.drop(columns=['SEGMENT'])

# Define features (X) and target (y)
X = df.drop(columns=['PURCHASES'])
y = df['PURCHASES']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 1. Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

print("Linear Regression:")
print("R2 Score:", r2_score(y_test, y_pred_lr))
print("MSE:", mean_squared_error(y_test, y_pred_lr))
print()

### 2. Decision Tree Regressor
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

print("Decision Tree Regressor:")
print("R2 Score:", r2_score(y_test, y_pred_dt))
print("MSE:", mean_squared_error(y_test, y_pred_dt))
print()

### 3. Random Forest Regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("Random Forest Regressor:")
print("R2 Score:", r2_score(y_test, y_pred_rf))
print("MSE:", mean_squared_error(y_test, y_pred_rf))


Linear Regression:
R2 Score: -0.004166659088197688
MSE: 8138352.619467336

Decision Tree Regressor:
R2 Score: -1.0700511348170454
MSE: 16776902.44243605

Random Forest Regressor:
R2 Score: -0.030322873275518525
MSE: 8350337.843553566


In [19]:
# Load the dataset
df = pd.read_csv('credit_card_users_segmented.csv')
df

Unnamed: 0,BALANCE,BALANCE_FREQUENCY,PURCHASES,ONEOFF_PURCHASES,INSTALLMENTS_PURCHASES,CASH_ADVANCE,PURCHASES_FREQUENCY,ONEOFF_PURCHASES_FREQUENCY,PURCHASES_INSTALLMENTS_FREQUENCY,CASH_ADVANCE_FREQUENCY,CASH_ADVANCE_TRX,PURCHASES_TRX,CREDIT_LIMIT,PAYMENTS,MINIMUM_PAYMENTS,PRC_FULL_PAYMENT,TENURE,SEGMENT
0,1872.70,0.37,7299.98,6381.45,2989.12,6777.89,0.74,0.04,0.82,0.88,1,28,14206.27,8589.75,3249.43,0.68,7,Cash Advance User
1,4753.57,0.33,1845.12,4592.92,948.18,3956.14,0.88,0.83,0.15,0.91,7,96,7839.78,3571.16,1272.64,0.61,12,Moderate User
2,3659.97,0.18,3466.40,9644.99,1263.59,1563.72,0.46,0.25,0.95,0.38,29,35,13430.59,1613.26,1064.46,0.45,6,One-off Purchaser
3,2993.29,0.61,6632.81,2189.78,1806.71,5893.13,0.29,0.28,0.84,0.23,14,70,4284.78,3686.45,4684.41,0.93,11,Cash Advance User
4,780.09,0.48,4820.89,5878.56,2036.53,3349.43,0.32,0.23,0.92,0.19,24,55,11815.81,13715.55,4792.87,0.55,9,Moderate User
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,4288.28,0.88,9522.75,7540.34,6224.45,7729.13,0.59,0.32,0.91,0.79,13,89,17935.01,9197.71,1772.60,0.36,9,High Spender
9996,4487.54,0.05,7478.24,7645.27,2261.94,2985.92,0.95,0.37,0.04,0.78,6,81,4435.08,8924.30,3269.19,0.26,8,Moderate User
9997,4733.54,0.30,194.56,2695.69,7489.95,2437.40,0.92,0.38,0.14,0.67,4,45,14442.96,3634.99,395.50,0.76,10,Moderate User
9998,1987.44,0.44,4010.05,4343.20,5210.11,3258.90,0.20,0.13,0.26,0.50,19,2,14161.82,1195.57,2445.76,0.74,9,Moderate User


In [8]:
pip install gradio scikit-learn pandas


Note: you may need to restart the kernel to use updated packages.


In [13]:
import pandas as pd
import gradio as gr
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Load and prepare data
df = pd.read_csv("credit_card_users_segmented.csv")
df = df.drop(columns=["SEGMENT"])

# Features and Target
X = df[["BALANCE", "CASH_ADVANCE", "CREDIT_LIMIT", "PAYMENTS", "TENURE"]]
y = df["PURCHASES"]

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Prediction function
def predict(balance, cash_advance, credit_limit, payments, tenure):
    input_data = [[balance, cash_advance, credit_limit, payments, tenure]]
    result = model.predict(input_data)
    return round(result[0], 2)

# Gradio UI
gr.Interface(
    fn=predict,
    inputs=[
        gr.Number(label="Balance"),
        gr.Number(label="Cash Advance"),
        gr.Number(label="Credit Limit"),
        gr.Number(label="Payments"),
        gr.Slider(6, 12, step=1, label="Tenure"),
    ],
    outputs=gr.Number(label="Predicted Purchases"),
    title="Simple Credit Card Purchases Predictor"
).launch()


* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.


