In [None]:
!pip install psycopg2-binary
!pip install boto3
!pip install mlflow==2.7.0

# Load data from postgre

In [1]:
import pandas as pd
import psycopg2 as pg

dataset_name = "train_cs"

engine = pg.connect("host=cassandra-postgresql.feast-db port=5432 dbname=FEAST_OFFLINE_STORE user=postgres password=postgres")
df = pd.read_sql(f'select * from {dataset_name}', con=engine)

In [2]:
df.head()

Unnamed: 0,Month,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,...,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Credit_Score
0,2,23.0,4,19114.12,4205.50294,3,4,3,4.0,1,...,1,809.98,31.94496,17.961599,2,49.574949,118.280222,6.0,284.629162,2
1,3,116.302136,4,19114.12,4205.50294,3,4,3,4.0,3,...,1,809.98,28.609352,22.0,2,49.574949,81.699521,3.0,331.209863,2
2,4,23.0,4,19114.12,4205.50294,3,4,3,4.0,5,...,1,809.98,31.377862,22.0,2,49.574949,199.458074,1.0,223.45131,2
3,5,23.0,4,19114.12,1824.843333,3,4,3,4.0,6,...,1,809.98,24.797347,22.0,2,49.574949,41.420153,2.0,341.489231,2
4,7,23.0,4,19114.12,1824.843333,3,4,3,4.0,3,...,1,809.98,22.537593,22.0,2,49.574949,178.344067,1.0,244.565317,2


# Prepare data

In [3]:
train_data=df.drop("Credit_Score",axis=1)
label_data=df["Credit_Score"]

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train_data, label_data ,test_size=0.2, random_state=42)

# Train Model

In [5]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=3, random_state=0).fit(X_train, y_train)
score = clf.score(X_test, y_test)
score

0.7327347057964666

# Log Model

In [9]:
import mlflow

mlflow.set_experiment("Credit Score Classification")

with mlflow.start_run() as run:
    
    mlflow.log_metrics({"accuracy": score})
    
    mlflow.sklearn.log_model(
        sk_model=clf,
        artifact_path="model",
    )

### Next you need to register your model in mlflow

# Test Logged Model

In [10]:
import pandas as pd
import psycopg2 as pg

dataset_name = "test_cs"

engine = pg.connect("host=cassandra-postgresql.feast-db port=5432 dbname=FEAST_OFFLINE_STORE user=postgres password=postgres")
df_test = pd.read_sql(f'select * from {dataset_name}', con=engine)

In [11]:
df.head()

Unnamed: 0,Month,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,...,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Credit_Score
0,2,23.0,4,19114.12,4205.50294,3,4,3,4.0,1,...,1,809.98,31.94496,17.961599,2,49.574949,118.280222,6.0,284.629162,2
1,3,116.302136,4,19114.12,4205.50294,3,4,3,4.0,3,...,1,809.98,28.609352,22.0,2,49.574949,81.699521,3.0,331.209863,2
2,4,23.0,4,19114.12,4205.50294,3,4,3,4.0,5,...,1,809.98,31.377862,22.0,2,49.574949,199.458074,1.0,223.45131,2
3,5,23.0,4,19114.12,1824.843333,3,4,3,4.0,6,...,1,809.98,24.797347,22.0,2,49.574949,41.420153,2.0,341.489231,2
4,7,23.0,4,19114.12,1824.843333,3,4,3,4.0,3,...,1,809.98,22.537593,22.0,2,49.574949,178.344067,1.0,244.565317,2


In [12]:
import mlflow

model_name = "csgb"
stage = "Production"

model = mlflow.sklearn.load_model(f"models:/{model_name}/{stage}")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [13]:
model.predict(df_test)

array([2, 2, 2, ..., 1, 1, 0])