In [4]:
import os

# Replace with the actual path to your JSON key file
credentials_path = "/Users/amir/Desktop/Caixa Bank/caixa-bank-6c4f815e89ac.json"

# Check if the file exists
if not os.path.exists(credentials_path):
    raise FileNotFoundError(f"Credentials file not found: {credentials_path}")

# Set the environment variable
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path

# **Important**: Restart the kernel or runtime environment after setting the environment variable.
# This ensures that the changes are picked up by the BigQuery client library.

In [5]:
from google.cloud import  bigquery
import pandas as pd

In [6]:
client = bigquery.Client()

In [7]:
query = """ SELECT * FROM `caixa-bank.ML.risk_table`"""

In [8]:
risk_df = client.query(query).to_dataframe()



In [10]:
pip install google-cloud-bigquery-storage


Collecting google-cloud-bigquery-storage
  Downloading google_cloud_bigquery_storage-2.27.0-py2.py3-none-any.whl.metadata (5.6 kB)
Collecting protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2 (from google-cloud-bigquery-storage)
  Using cached protobuf-5.29.0-cp38-abi3-macosx_10_9_universal2.whl.metadata (592 bytes)
Downloading google_cloud_bigquery_storage-2.27.0-py2.py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hUsing cached protobuf-5.29.0-cp38-abi3-macosx_10_9_universal2.whl (417 kB)
Installing collected packages: protobuf, google-cloud-bigquery-storage
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are i

In [9]:
risk_df.head()

Unnamed: 0,customer_id,current_age,yearly_income,total_debt,credit_score,per_capita_income,retirement_age,gender,num_credit_cards,fraud_count,risk_category
0,600,18,46878,60121,578,22991,55,Male,1,0,Low Risk
1,1716,18,25654,55761,746,12583,65,Female,1,0,Low Risk
2,1134,18,25847,46379,568,12677,59,Female,1,0,Low Risk
3,1093,18,47543,124177,667,23316,70,Male,1,0,Low Risk
4,1906,18,28950,61297,609,14199,71,Male,1,0,Low Risk


In [None]:
risk_df['debt_to_income_ratio'] = risk_df['total_debt'] / risk_df['yearly_income'] # Customers with higher debt-to-income ratios are generally riskier.
risk_df['credit_utilization'] = risk_df['total_debt'] / (risk_df['num_credit_cards'] * risk_df['credit_score'])# Customers with higher utilization (close to 1 or more) are generally riskier.
risk_df['age_to_retirement_ratio'] = (risk_df['retirement_age'] - risk_df['current_age']) / risk_df['retirement_age']#Younger customers may behave differently than those nearing retirement.
risk_df['has_fraud_history'] = risk_df['fraud_count'].apply(lambda x: 1 if x > 0 else 0)
risk_df['debt_credit_interaction'] = risk_df['total_debt'] * risk_df['credit_score']




In [None]:
risk_df.head()


In [None]:
gender_encoded = pd.get_dummies(risk_df['gender'], prefix='gender')


risk_df = pd.concat([risk_df, gender_encoded], axis=1)


risk_df = risk_df.drop(columns=['gender'])




In [None]:
risk_df.head()


In [None]:
from sklearn.model_selection import train_test_split

# Define features and target
X = risk_df[['current_age', 'yearly_income', 'total_debt', 'credit_score', 'per_capita_income',
             'debt_to_income_ratio', 'age_to_retirement_ratio', 'has_fraud_history', 'gender_Male', 'gender_Female']]
y = risk_df['risk_category'].map({'Low Risk': 0, 'Medium Risk': 1, 'High Risk': 2})

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
from sklearn.linear_model import LogisticRegression

# Initialize the logistic regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)

# Train the model
model.fit(X_train_scaled, y_train)


In [None]:
# Predict on the test set
y_pred = model.predict(X_test_scaled)


In [None]:
from sklearn.metrics import accuracy_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


In [None]:
import joblib

# Save the trained model
joblib.dump(model, 'logistic_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved successfully!")


In [None]:
# Load the model and scaler
model = joblib.load('logistic_model.pkl')
scaler = joblib.load('scaler.pkl')

print("Model and scaler loaded successfully!")


In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Save files to Drive
joblib.dump(model, '/content/drive/My Drive/logistic_model.pkl')
joblib.dump(scaler, '/content/drive/My Drive/scaler.pkl')


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import joblib

# Load the model and scaler
model = joblib.load('logistic_model.pkl')
scaler = joblib.load('scaler.pkl')

st.title("Risk Prediction Application")

# Input fields
age = st.slider("Current Age", 18, 100, 30)
income = st.number_input("Yearly Income", min_value=0, value=50000)
debt = st.number_input("Total Debt", min_value=0, value=20000)
credit_score = st.slider("Credit Score", 300, 850, 700)
per_capita_income = st.number_input("Per Capita Income", min_value=0, value=20000)
gender = st.radio("Gender", ("Male", "Female"))

# Button to predict
if st.button("Predict Risk"):
    gender_values = [1, 0] if gender == "Male" else [0, 1]
    input_data = [[age, income, debt, credit_score, per_capita_income] + gender_values]
    scaled_data = scaler.transform(input_data)
    prediction = model.predict(scaled_data)[0]
    risk_category = ["Low Risk", "Medium Risk", "High Risk"][prediction]
    st.success(f"The predicted risk category is: {risk_category}")


In [None]:
#You can't directly run a Streamlit app within Colab. Instead, use a service like ngrok to expose your app to the internet.
from pyngrok import ngrok
!streamlit run app.py &

public_url = ngrok.connect(port="8501")
print(f"Streamlit app is live at: {public_url}")
