In [1]:
# EDA
import pandas as pd
import numpy as np

# oracle sql
import cx_Oracle

## date handling
from datetime import datetime, timedelta
from pytz import timezone


# visualizing
import seaborn as sns
import matplotlib.pyplot as plt

# modelling
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.ensemble import RandomForestRegressor

# accuracy metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# scaling
from sklearn.preprocessing import StandardScaler

# deployment
import pickle as pkl
from sklearn.pipeline import Pipeline

## for ignoring deprecation warnings
from warnings import filterwarnings
filterwarnings('ignore')

## Create Connection

In [2]:
dsn = cx_Oracle.makedsn("dwh-scan.kapitalbank.az", 1521, service_name="report")

connection = cx_Oracle.connect(user='SAMADOVIAZ_15121[FRAUD]',password='Dwh*@(#052316:09B1CD62470',dsn=dsn)

In [3]:
query_all="""
select /*+parallel */
 a.bank_time,
 a.TERMOWNER,
 round(a.CASHBACK_RATE, 1) CASHBACK_RATE,
 round(a.CASHBACK_AMOUNT, 1) CASHBACK_AMOUNT,
 round(a.BALANCE_AFTER, 1) BALANCE_AFTER,
 a.DIRECTION,
 b.pin,
 b.marstat,
 b.birth_date,
 b.hiredate,
 c.mcc,
 c.mcc_description,
 c.mcc_group,
 d.id,
 d.currency,
 d.name
  from cms.f_cashback_transaction a
 inner join fraud.emp_01_05_2023 b
    on a.cms_id = b.cms_id
 INNER JOIN CMS.D_MCC_DETAILS C
    ON A.MCC = C.MCC
 inner join dwh.D_CURRENCY d
    on a.from_cur = d.id"""

In [4]:
%%time

df_all=pd.read_sql(query_all,con=connection)
df_all.tail()

Wall time: 1min 41s


Unnamed: 0,BANK_TIME,TERMOWNER,CASHBACK_RATE,CASHBACK_AMOUNT,BALANCE_AFTER,DIRECTION,PIN,MARSTAT,BIRTH_DATE,HIREDATE,MCC,MCC_DESCRIPTION,MCC_GROUP,ID,CURRENCY,NAME
2087089,2021-09-16 19:15:03,POS LINS COFFEE S.R,1.5,0.1,0.3,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5812,"Eating Places, Restaurants",VARIOUS SHOPS,944,AZN,Азербайджанский манат
2087090,2021-09-16 17:43:03,SPECIALTY COFFEE,1.5,0.1,0.3,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5499,"Misc Food Stores-Speciality,Convenience,",RETAIL STORES,944,AZN,Азербайджанский манат
2087091,2021-09-16 11:49:07,PASHA POS H2H MCD 28 MAY 6,1.5,0.0,0.0,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5814,Fast Food Resturant,VARIOUS SHOPS,944,AZN,Азербайджанский манат
2087092,2021-09-16 11:51:28,BRAVO MARKET 28 MALL,1.5,0.0,0.1,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5411,"Grocery Stores, Supermarkets",RETAIL STORES,944,AZN,Азербайджанский манат
2087093,2023-04-26 22:54:29,L?MON SUPERMARKET,1.5,0.2,2.5,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5411,"Grocery Stores, Supermarkets",RETAIL STORES,944,AZN,Азербайджанский манат


In [5]:
df_all.head()

Unnamed: 0,BANK_TIME,TERMOWNER,CASHBACK_RATE,CASHBACK_AMOUNT,BALANCE_AFTER,DIRECTION,PIN,MARSTAT,BIRTH_DATE,HIREDATE,MCC,MCC_DESCRIPTION,MCC_GROUP,ID,CURRENCY,NAME
0,2022-12-19 21:04:23,YDM 27,1.5,0.3,10.9,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,5541,Service Stations,AUTOMOBILES AND VEHICLES,944,AZN,Азербайджанский манат
1,2023-04-25 17:26:16,www.birbank.az,0.0,0.0,4.0,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,6012,Member Financial Institution-Merchandise,SERVICE PROVIDER,944,AZN,Азербайджанский манат
2,2023-04-25 13:07:29,INSIDE RESTORAN,2.0,0.2,3.9,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,5812,"Eating Places, Restaurants",VARIOUS SHOPS,840,USD,Американский доллар
3,2023-02-16 21:50:23,DANIZ MARKET,3.0,0.1,10.3,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,5411,"Grocery Stores, Supermarkets",RETAIL STORES,840,USD,Американский доллар
4,2022-12-18 19:12:44,BOLT FOOD,1.5,0.4,10.6,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,4215,"Courier Services-Air Or Ground,Freight F",TRANSPORT,944,AZN,Азербайджанский манат


In [6]:
# df_all.to_csv(r"C:\Users\SamadovIAz\Desktop\Cashback_Analyse\df.csv", sep=',', encoding='utf-8', header='true')

In [7]:
df_all.to_parquet(r"C:\Users\SamadovIAz\Desktop\Cashback_Analyse\df.parquet")  # 396,204 KB csv   --> 35,963 KB parquet

In [8]:
check=pd.read_parquet('df.parquet')

In [9]:
check

Unnamed: 0,BANK_TIME,TERMOWNER,CASHBACK_RATE,CASHBACK_AMOUNT,BALANCE_AFTER,DIRECTION,PIN,MARSTAT,BIRTH_DATE,HIREDATE,MCC,MCC_DESCRIPTION,MCC_GROUP,ID,CURRENCY,NAME
0,2022-12-19 21:04:23,YDM 27,1.5,0.3,10.9,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,5541,Service Stations,AUTOMOBILES AND VEHICLES,944,AZN,Азербайджанский манат
1,2023-04-25 17:26:16,www.birbank.az,0.0,0.0,4.0,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,6012,Member Financial Institution-Merchandise,SERVICE PROVIDER,944,AZN,Азербайджанский манат
2,2023-04-25 13:07:29,INSIDE RESTORAN,2.0,0.2,3.9,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,5812,"Eating Places, Restaurants",VARIOUS SHOPS,840,USD,Американский доллар
3,2023-02-16 21:50:23,DANIZ MARKET,3.0,0.1,10.3,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,5411,"Grocery Stores, Supermarkets",RETAIL STORES,840,USD,Американский доллар
4,2022-12-18 19:12:44,BOLT FOOD,1.5,0.4,10.6,CREDIT,4M9LANY,Subay,01.09.1989,08.05.2021,4215,"Courier Services-Air Or Ground,Freight F",TRANSPORT,944,AZN,Азербайджанский манат
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2087089,2021-09-16 19:15:03,POS LINS COFFEE S.R,1.5,0.1,0.3,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5812,"Eating Places, Restaurants",VARIOUS SHOPS,944,AZN,Азербайджанский манат
2087090,2021-09-16 17:43:03,SPECIALTY COFFEE,1.5,0.1,0.3,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5499,"Misc Food Stores-Speciality,Convenience,",RETAIL STORES,944,AZN,Азербайджанский манат
2087091,2021-09-16 11:49:07,PASHA POS H2H MCD 28 MAY 6,1.5,0.0,0.0,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5814,Fast Food Resturant,VARIOUS SHOPS,944,AZN,Азербайджанский манат
2087092,2021-09-16 11:51:28,BRAVO MARKET 28 MALL,1.5,0.0,0.1,CREDIT,2HQBLXN,Evli,25.06.1967,17.10.2019,5411,"Grocery Stores, Supermarkets",RETAIL STORES,944,AZN,Азербайджанский манат
