In [9]:
import re
import time
import datetime

import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

# Helper Functions


In [10]:
def to_lowercase(df):
	df = df.copy()
	df.rename({i:i.lower() for i in df.columns.values}, axis=1, inplace=True)
	return df

def to_snakecase(df):
	snakecase = {i: re.sub(r"[,.;@#?!&$]+\ *", "", i.strip()).replace(" ", "_") for i in df}
	df.rename(columns=snakecase, inplace=True)
	return df


def get_Users(path="../../../data/users.csv", date_format = "%Y-%m"):
	users = to_snakecase(to_lowercase(pd.read_csv(path)))
	users['user'] = users['user'].astype(str).str.pad(width=10, side='left', fillchar='0')
	users['birth_year'] = users['birth_year'].astype(str) + '-' + users['birth_month'].astype(str)
	users['birth_year'] = pd.to_datetime(users['birth_year'], format=date_format)
	users = users.drop(columns= ['birth_month'])
	users = users.rename(columns={'birth_year': 'birth_year_month'})
	return users


def get_CreditCards(path="../../../data/users.csv", date_format = "%m/%Y"):
	credit_cards = to_snakecase(to_lowercase(pd.read_csv("../../../data/credit_cards.csv")))
	credit_cards['user'] = credit_cards['user'].astype(str).str.pad(width=10, side='left', fillchar='0')
	credit_cards['expires'] = pd.to_datetime(credit_cards['expires'], format=date_format)
	credit_cards['acct_open_date'] = pd.to_datetime(credit_cards['expires'], format=date_format)
	credit_cards['year_pin_last_changed'] = pd.to_datetime(credit_cards['year_pin_last_changed'], format="%Y")
	return credit_cards

def get_Transactions(path="../../../data/transactions.csv", date_format = "%m/%Y"):
	transactions = to_snakecase(to_lowercase(pd.read_csv(path)))
	transactions.insert(0, 'identifier', transactions.index + 1) 
	transactions['user'] = transactions['user'].astype(str).str.pad(width=10, side='left', fillchar='0')
	transactions = transactions.rename(columns={'card':'card_index'})
	hour_min = transactions['time'].str.split(":", expand=True).rename(columns={0:'hour', 1:'minute'})
	transactions = pd.concat([transactions, hour_min], axis=1)

	date_cols = ['year', 'month', 'day', 'hour', 'minute']
	transactions['date'] = pd.to_datetime(transactions[date_cols])

	cc_no = get_CreditCards()[['user', 'card_index', 'card_number']]
	card_no = transactions.merge(cc_no, how='inner', on=['user', 'card_index'])['card_number']
	transactions.insert(1, 'card_number', card_no) 
	transactions = transactions.drop(columns= ['card_index', 'time'] + date_cols)
	return transactions

def get_Products(path="../../../data/santander_train_small.csv", 
				column_mapping = {
					"fecha_dato": "report_date",
					"ncodpers": "customer_id",
					"ind_empleado": "employee_index",
					"pais_residencia": "country_residence",
					"sexo": "gender",
					"age": "age",
					"fecha_alta": "contract_start_date",
					"ind_nuevo": "new_customer_index",
					"antiguedad": "seniority_months",
					"indrel": "primary_customer_status",
					"ult_fec_cli_1t": "last_primary_customer_date",
					"indrel_1mes": "customer_type_start_month",
					"tiprel_1mes": "customer_relation_type",
					"indresi": "residence_index",
					"indext": "foreigner_index",
					"conyuemp": "spouse_employee_index",
					"canal_entrada": "join_channel",
					"indfall": "deceased_index",
					"tipodom": "address_type",
					"cod_prov": "province_code",
					"nomprov": "province_name",
					"ind_actividad_cliente": "activity_index",
					"renta": "gross_income",
					"segmento": "customer_segment",
					"ind_ahor_fin_ult1": "saving_account",
					"ind_aval_fin_ult1": "guarantee",
					"ind_cco_fin_ult1": "current_account",
					"ind_cder_fin_ult1": "derivada_account",
					"ind_cno_fin_ult1": "payroll_account",
					"ind_ctju_fin_ult1": "junior_account",
					"ind_ctma_fin_ult1": "more_particular_account",
					"ind_ctop_fin_ult1": "particular_account",
					"ind_ctpp_fin_ult1": "particular_plus_account",
					"ind_deco_fin_ult1": "short_term_deposits",
					"ind_deme_fin_ult1": "medium_term_deposits",
					"ind_dela_fin_ult1": "long_term_deposits",
					"ind_ecue_fin_ult1": "e_account",
					"ind_fond_fin_ult1": "funds",
					"ind_hip_fin_ult1": "mortgage",
					"ind_plan_fin_ult1": "pensions",
					"ind_pres_fin_ult1": "loans",
					"ind_reca_fin_ult1": "taxes",
					"ind_tjcr_fin_ult1": "credit_card",
					"ind_valo_fin_ult1": "securities",
					"ind_viv_fin_ult1": "home_account",
					"ind_nomina_ult1": "payroll",
					"ind_nom_pens_ult1": "pensions_payments",
					"ind_recibo_ult1": "direct_debit"
				}
	):
	santender = pd.read_csv(path)
	santender = santender.rename(columns=column_mapping)
	santender = to_snakecase(to_lowercase(santender))
	santender['report_date'] = pd.to_datetime(santender['report_date'])
	santender['contract_start_date'] = pd.to_datetime(santender['contract_start_date'])
	santender['last_primary_customer_date'] = pd.to_datetime(santender['last_primary_customer_date'])
	str_cols = santender.select_dtypes(include='object').columns
	santender[str_cols] = santender[str_cols].apply(lambda x: x.str.strip(), axis=1)
	santender[str_cols] = santender[str_cols].replace(regex=[r'NA'], value=None)
	return santender

def get_Churn(path="../../../data/churn_modelling.csv"):
	churn = to_snakecase(to_lowercase(pd.read_csv(path)))
	return churn

def get_Engagement(path="../../../data/campaign_engage.csv"):
	engagement = to_snakecase(to_lowercase(pd.read_csv(path)))
	engagement = engagement.drop(columns=['advertisingplatform','advertisingtool'])
	return engagement

# Get Data as pd.DataFrame

In [11]:
users = get_Users()
credit_cards = get_CreditCards()
transactions = get_Transactions()
santender = get_Products()
churn = get_Churn()
engagement = get_Engagement()


# Connect to Database

In [None]:
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship, Mapped, mapped_column
from sqlalchemy import create_engine, Column, Integer, String, Double, DateTime, ForeignKey


def create_db(user="root", password="msql1234", server="localhost", database="transact"):
    SQLALCHEMY_DATABASE_URL = "mysql+pymysql://{}:{}@{}/{}".format(
        user, password, server, database
    )
    engine = create_engine(SQLALCHEMY_DATABASE_URL)

    SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
    Base = declarative_base()

    return engine, SessionLocal, Base

engine, SessionLocal, Base = create_db()

# Schemas

In [12]:
class Users(Base):
	__tablename__ = 'users'
	user = Column(String(10), primary_key=True, nullable=False)
	person = Column(String(32))
	current_age = Column(Integer)
	retirement_age = Column(Integer)
	birth_year_month = Column(DateTime)
	gender = Column(String(32))
	address = Column(String(64))
	apartment = Column(Integer)
	city = Column(String(32))
	state = Column(String(32))
	zipcode = Column(String(32))
	latitude = Column(Double)
	longitude = Column(Double)
	per_capita_income = Column(Double)
	yearly_income = Column(Double)
	total_debt = Column(Double)
	fico_score = Column(Double)
	num_credit_cards = Column(Integer)


class Cards(Base):
	__tablename__ = 'credit_cards'
	user = Column(String(10), ForeignKey("users.user", ondelete="CASCADE"), nullable=False)
	card_index = Column(Integer)
	card_brand = Column(String(32))
	card_type = Column(String(32))
	card_number = Column(String(16), primary_key=True, nullable=False)
	expires  = Column(DateTime)
	cvv = Column(String(3))
	has_chip = Column(String(3)) 
	cards_issued = Column(Integer)
	credit_limit = Column(Double)
	acct_open_date = Column(DateTime)
	year_pin_last_changed = Column(DateTime)
	card_on_dark_web = Column(String(3))

class Transactions(Base):
	__tablename__ = "transactions"
	identifier = Column(Integer, primary_key=True, autoincrement=True)
	user = Column(String(10), ForeignKey("users.user", ondelete="CASCADE"), nullable=False)
	card_number = Column(String(16), ForeignKey("credit_cards.card_number", ondelete="CASCADE"), nullable=False)
	date = Column(DateTime)
	amount = Column(Double)
	use_chip = Column(String(32))
	merchant_name = Column(String(32))
	merchant_city = Column(String(32))
	merchant_state = Column(String(32))
	zip = Column(String(16))
	mcc = Column(Integer)
	errors = Column(String(32))
	is_fraud = Column(String(3))

class Engagement(Base): # Indepedent
	__tablename__ = "engagement"
	customerid = Column(Integer, primary_key=True, autoincrement=True)
	age = Column(Integer)
	gender = Column(String(16))
	income = Column(Double)
	campaignchannel = Column(String(32))
	campaigntype = Column(String(32))
	adspend = Column(Double)
	clickthroughrate = Column(Double) 
	conversionrate = Column(Integer)
	websitevisits = Column(Integer)
	pagespervisit = Column(Double) 
	timeonsite = Column(Double) 
	socialshares = Column(Integer)
	emailopens = Column(Integer)
	emailclicks = Column(Integer)
	previouspurchases = Column(Integer)
	loyaltypoints = Column(Integer)
	# advertisingplatform = Column(String(32)) 
	# advertisingtool = Column(String(32))
	conversion = Column(Integer)

class Product(Base):
	__tablename__ = 'santender'
	report_date = Column(DateTime)
	customer_id = Column(Integer, primary_key=True, autoincrement=True)
	employee_index =  Column(String(16))
	country_residence = Column(String(32))
	gender =  Column(String(16))
	age = Column(Integer)
	contract_start_date = Column(DateTime)
	new_customer_index = Column(Integer)
	seniority_months = Column(Integer)
	primary_customer_status = Column(Integer)
	last_primary_customer_date = Column(DateTime)
	customer_type_start_month = Column(Integer)
	customer_relation_type = Column(String(16))
	residence_index = Column(String(16))
	foreigner_index = Column(String(16))
	spouse_employee_index = Column(String(16))
	join_channel = Column(String(16))
	deceased_index = Column(String(16))
	address_type = Column(Integer)
	province_code = Column(Integer)
	province_name = Column(String(32))
	activity_index = Column(Integer)
	gross_income = Column(Double)
	customer_segment = Column(String(32))
	saving_account = Column(Integer)
	guarantee = Column(Integer) 
	current_account = Column(Integer)
	derivada_account = Column(Integer)
	payroll_account = Column(Integer)
	junior_account = Column(Integer)
	more_particular_account = Column(Integer)
	particular_account = Column(Integer)
	particular_plus_account = Column(Integer)
	short_term_deposits = Column(Integer)
	medium_term_deposits = Column(Integer)
	long_term_deposits = Column(Integer)
	e_account = Column(Integer)
	funds = Column(Integer)
	mortgage = Column(Integer)
	pensions = Column(Integer)
	loans = Column(Integer)
	taxes = Column(Integer)
	credit_card = Column(Integer)
	securities = Column(Integer)
	home_account = Column(Integer)
	payroll = Column(Integer)
	pensions_payments = Column(Integer)
	direct_debit = Column(Integer)


Base.metadata.create_all(engine)

# Insert into Database

In [13]:
with engine.connect() as db:
	dct = {'users': users, 'credit_cards': credit_cards,
		'transactions': transactions, 'engagement':engagement,
		'santender': santender}
	for k,v in dct.items():
		try:
			v.to_sql(k, con=engine, if_exists='append', index=False)
			db.commit()
		except:
			db.rollback()
			print(k)
	db.close()

# Example on How to get data

In [15]:
import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine


def create_db(user="root", password="msql1234", server="localhost", database="transact"):
    SQLALCHEMY_DATABASE_URL = "mysql+pymysql://{}:{}@{}/{}".format(
        user, password, server, database
    )
    engine = create_engine(SQLALCHEMY_DATABASE_URL)

    SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
    Base = declarative_base()

    return engine, SessionLocal, Base

engine, SessionLocal, Base = create_db()

In [16]:
with engine.connect() as db:
	query_string = sqlalchemy.text(
		"""SELECT * 
		FROM users u, credit_cards cc
		WHERE u.user = cc.user
		LIMIT 100; """
	)
	fetched = db.execute(query_string).fetchall()
	db.close()

In [17]:
pd.DataFrame(fetched)

Unnamed: 0,user,person,current_age,retirement_age,birth_year_month,gender,address,apartment,city,state,...,card_type,card_number,expires,cvv,has_chip,cards_issued,credit_limit,acct_open_date,year_pin_last_changed,card_on_dark_web
0,0000000000,Hazel Robinson,53,66,1966-11-01,Female,462 Rose Lane,,La Verne,CA,...,Debit,4344676511950444,2022-12-01,623,YES,2,24295.0,2022-12-01,2008-01-01,No
1,0000000000,Hazel Robinson,53,66,1966-11-01,Female,462 Rose Lane,,La Verne,CA,...,Debit,4582313478255491,2024-02-01,719,YES,2,46414.0,2024-02-01,2004-01-01,No
2,0000000000,Hazel Robinson,53,66,1966-11-01,Female,462 Rose Lane,,La Verne,CA,...,Credit,4879494103069057,2024-08-01,693,NO,1,12400.0,2024-08-01,2012-01-01,No
3,0000000000,Hazel Robinson,53,66,1966-11-01,Female,462 Rose Lane,,La Verne,CA,...,Debit,4956965974959986,2020-12-01,393,YES,2,21968.0,2020-12-01,2014-01-01,No
4,0000000000,Hazel Robinson,53,66,1966-11-01,Female,462 Rose Lane,,La Verne,CA,...,Debit (Prepaid),5722874738736011,2009-03-01,75,YES,1,28.0,2009-03-01,2009-01-01,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0000000033,Rosa Faraday,21,66,1998-09-01,Female,93 Plum Lane,,Old Saybrook,CT,...,Debit,5068255294282305,2020-11-01,890,YES,1,25815.0,2020-11-01,2017-01-01,No
96,0000000034,Brynn Allen,47,65,1972-11-01,Female,837 Lincoln Avenue,,Mobile,AL,...,Credit,354813103334804,2021-06-01,527,YES,2,8400.0,2021-06-01,2014-01-01,No
97,0000000034,Brynn Allen,47,65,1972-11-01,Female,837 Lincoln Avenue,,Mobile,AL,...,Debit,5400182448188961,2021-10-01,412,YES,1,24232.0,2021-10-01,2009-01-01,No
98,0000000034,Brynn Allen,47,65,1972-11-01,Female,837 Lincoln Avenue,,Mobile,AL,...,Debit,5910929635230868,2020-02-01,525,YES,1,21628.0,2020-02-01,2008-01-01,No
