In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [41]:
from sqlalchemy import create_engine, Column, String, Integer, DateTime
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

In [42]:
DB_URL = (
    "postgresql://test:must_be_eight_characters"
    + "@example-1.cluster-cculi2axzscc.us-east-1.rds.amazonaws.com:5432/test"
)

engine = create_engine(DB_URL)

# Create a session to interact with the database
Session = sessionmaker(bind=engine)
session = Session()

In [43]:
# Create a base class for declarative models
Base = declarative_base()


# Define the User model
class User(Base):
    __tablename__ = "users"

    id = Column(String, primary_key=True)
    name = Column(String)
    password = Column(String)
    gender = Column(String)
    date_of_birth = Column(DateTime)


# Define the TaxDetails model
class TaxDetails(Base):
    __tablename__ = "tax_details"

    tax_id = Column(String, primary_key=True)
    user_id = Column(String)
    year = Column(Integer)
    income = Column(Integer)
    taxable_income = Column(Integer)
    location = Column(String)
    tax_amount = Column(Integer)

In [44]:
# fetch all users and save it as a dataframe
users = session.query(User).all()
users_df = pd.DataFrame([user.__dict__ for user in users])

In [45]:
# fetch all tax details and save it as a dataframe
tax_details = session.query(TaxDetails).all()
tax_details_df = pd.DataFrame([tax_detail.__dict__ for tax_detail in tax_details])

In [46]:
users_df.head()

Unnamed: 0,_sa_instance_state,name,password,date_of_birth,id,gender
0,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1964-01-01,8b5e9e55-b33c-487a-b93a-9907f9a8a789,female
1,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,2000-01-01,4517cb8a-9ad0-4d80-aa42-2b07aab66147,male
2,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1996-01-01,4929a434-03c8-4c8a-bfe8-d514977f34bd,female
3,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1986-01-01,a8730e91-988d-4f67-ae90-bc9b48cf6bff,female
4,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1985-01-01,eeaf8bea-38f5-4a1d-991a-91b782306360,female


In [47]:
tax_details_df.head()

Unnamed: 0,_sa_instance_state,tax_id,income,location,taxable_income,user_id,year,tax_amount
0,<sqlalchemy.orm.state.InstanceState object at ...,f60cf86f-f4d0-48cc-b7de-af6f37ef7efc,5766000,chittagong,5366000,eeaf8bea-38f5-4a1d-991a-91b782306360,2015,1211500
1,<sqlalchemy.orm.state.InstanceState object at ...,7f1de94d-b5ac-4643-99c4-7631218981c1,6919200,chittagong,6519200,eeaf8bea-38f5-4a1d-991a-91b782306360,2016,1499800
2,<sqlalchemy.orm.state.InstanceState object at ...,2fb5ef52-b340-468c-af18-1af90396d30b,8303040,chittagong,7903040,eeaf8bea-38f5-4a1d-991a-91b782306360,2017,1845760
3,<sqlalchemy.orm.state.InstanceState object at ...,bb6cd22f-3f5e-4ccd-8ea8-4a79e575b1ae,9963648,chittagong,9563648,eeaf8bea-38f5-4a1d-991a-91b782306360,2018,2260912
4,<sqlalchemy.orm.state.InstanceState object at ...,5b20781e-2a43-4ce8-a9a0-af0cedc04c6e,11956378,chittagong,11556378,eeaf8bea-38f5-4a1d-991a-91b782306360,2019,2759094


In [48]:
# perform join on both the dataframes, join key is user_id.id == tax_details.user_id
df = pd.merge(users_df, tax_details_df, left_on="id", right_on="user_id")

In [49]:
df

Unnamed: 0,_sa_instance_state_x,name,password,date_of_birth,id,gender,_sa_instance_state_y,tax_id,income,location,taxable_income,user_id,year,tax_amount
0,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1985-01-01,eeaf8bea-38f5-4a1d-991a-91b782306360,female,<sqlalchemy.orm.state.InstanceState object at ...,f60cf86f-f4d0-48cc-b7de-af6f37ef7efc,5766000,chittagong,5366000,eeaf8bea-38f5-4a1d-991a-91b782306360,2015,1211500
1,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1985-01-01,eeaf8bea-38f5-4a1d-991a-91b782306360,female,<sqlalchemy.orm.state.InstanceState object at ...,7f1de94d-b5ac-4643-99c4-7631218981c1,6919200,chittagong,6519200,eeaf8bea-38f5-4a1d-991a-91b782306360,2016,1499800
2,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1985-01-01,eeaf8bea-38f5-4a1d-991a-91b782306360,female,<sqlalchemy.orm.state.InstanceState object at ...,2fb5ef52-b340-468c-af18-1af90396d30b,8303040,chittagong,7903040,eeaf8bea-38f5-4a1d-991a-91b782306360,2017,1845760
3,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1985-01-01,eeaf8bea-38f5-4a1d-991a-91b782306360,female,<sqlalchemy.orm.state.InstanceState object at ...,bb6cd22f-3f5e-4ccd-8ea8-4a79e575b1ae,9963648,chittagong,9563648,eeaf8bea-38f5-4a1d-991a-91b782306360,2018,2260912
4,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1985-01-01,eeaf8bea-38f5-4a1d-991a-91b782306360,female,<sqlalchemy.orm.state.InstanceState object at ...,5b20781e-2a43-4ce8-a9a0-af0cedc04c6e,11956378,chittagong,11556378,eeaf8bea-38f5-4a1d-991a-91b782306360,2019,2759094
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14002,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1962-01-01,3043f8f1-119f-4a0b-bfd7-bab005328d0a,female,<sqlalchemy.orm.state.InstanceState object at ...,0e5c0281-ba20-48b2-931b-440625d780a5,4684386,dhaka,4284386,3043f8f1-119f-4a0b-bfd7-bab005328d0a,2017,941096
14003,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1962-01-01,3043f8f1-119f-4a0b-bfd7-bab005328d0a,female,<sqlalchemy.orm.state.InstanceState object at ...,f8ca1b63-0c9a-48f0-8d1f-9f5bec2cbdee,5501071,dhaka,5101071,3043f8f1-119f-4a0b-bfd7-bab005328d0a,2018,1145267
14004,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1962-01-01,3043f8f1-119f-4a0b-bfd7-bab005328d0a,female,<sqlalchemy.orm.state.InstanceState object at ...,4f34631d-839c-433a-93e2-0c25f6d557f7,5688858,dhaka,5288858,3043f8f1-119f-4a0b-bfd7-bab005328d0a,2019,1192214
14005,<sqlalchemy.orm.state.InstanceState object at ...,User Name,Password,1962-01-01,3043f8f1-119f-4a0b-bfd7-bab005328d0a,female,<sqlalchemy.orm.state.InstanceState object at ...,2ea60cdf-115d-407a-b169-8d5600a05cba,6026708,dhaka,5626708,3043f8f1-119f-4a0b-bfd7-bab005328d0a,2020,1276677


In [50]:
# columns to keep for analysis - gender, income, id, location, tax_amount, taxable_income, year of df

df.drop("_sa_instance_state_x", axis=1, inplace=True)
df.drop("date_of_birth", axis=1, inplace=True)
df.drop("name", axis=1, inplace=True)
df.drop("password", axis=1, inplace=True)
df.drop("id", axis=1, inplace=True)
df.drop("tax_id", axis=1, inplace=True)
df.drop("user_id", axis=1, inplace=True)
df.drop("_sa_instance_state_y", axis=1, inplace=True)

In [51]:
df.head()

Unnamed: 0,gender,income,location,taxable_income,year,tax_amount
0,female,5766000,chittagong,5366000,2015,1211500
1,female,6919200,chittagong,6519200,2016,1499800
2,female,8303040,chittagong,7903040,2017,1845760
3,female,9963648,chittagong,9563648,2018,2260912
4,female,11956378,chittagong,11556378,2019,2759094


In [52]:
# save the dataframe as a csv file
df.to_csv("data.csv", index=False)