In [5]:
# Import necessary packages
import pandas as pd

# Load the data
input_csv_file = input('Enter the name of the csv file : ')
df = pd.read_csv(input_csv_file)
df.columns = df.columns.str.replace(' ', '_')

In [6]:
df.head()

Unnamed: 0,Invoice_ID,Branch,City,Customer_type,Gender,Product_line,Unit_price,Quantity,Tax_5%,Total,Date,Time,Payment,cogs,gross_margin_percentage,gross_income,Rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,1/5/2019,13:08,Ewallet,522.83,4.761905,26.1415,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,3/8/2019,10:29,Cash,76.4,4.761905,3.82,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,3/3/2019,13:23,Credit card,324.31,4.761905,16.2155,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,1/27/2019,20:33,Ewallet,465.76,4.761905,23.288,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2/8/2019,10:37,Ewallet,604.17,4.761905,30.2085,5.3


In [7]:
def is_not_number(value):
    try:
        float_value = float(value)
        return not isinstance(float_value, (int, float))
    except ValueError:
        return True

In [8]:
# Randomly shuffle the rows in the dataframe 
shuffled_df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Number of lines for json
json_percent = (input('Enter the JSON percentage : '))
json_percent = 30 if is_not_number(json_percent) else float(json_percent)
line_json = int((shuffled_df.shape[0] * json_percent) / 100)

# Number of lines for csv
csv_percent = (input('Enter the CSV percentage : '))
csv_percent = 40 if is_not_number(csv_percent) else float(csv_percent)
line_csv = int((shuffled_df.shape[0] * csv_percent) / 100)

# Number of lines for database
db_percent = (input('Enter the database percentage : '))
db_percent = 30 if is_not_number(db_percent) else float(db_percent)
line_db = int((shuffled_df.shape[0] * db_percent) / 100)

### Json 

In [9]:
# Take 30% to json
json_df = shuffled_df[:line_json].reset_index(drop=True)

# Delete the json df from the shuffled
shuffled_df.drop(json_df.index, axis='rows', inplace=True)

# Create json file
json_df.reset_index(drop=True).to_json('json_file.json', orient='records', date_format='iso')

In [10]:
shuffled_df.shape

(700, 17)

In [11]:
json_df.shape

(300, 17)

### CSV

In [12]:
# 40% for CSV
csv_df = shuffled_df[:line_csv]

# Delete the csv df to the shuffle
shuffled_df.drop(csv_df.index, axis='rows', inplace=True)

# Create the csv file
csv_df.reset_index(drop=True).to_csv('csv_file.csv', index=False, header=False)

In [13]:
shuffled_df.shape

(300, 17)

In [14]:
csv_df.shape

(400, 17)

### Database

In [15]:
# 30% for DB
db_df = shuffled_df[:line_db]

# Delete the csv df to the shuffle
shuffled_df.drop(db_df.index, axis='rows', inplace=True)

# Reset indexes
db_df.reset_index(drop=True, inplace=True)

In [16]:
# Import SQL Alchemy
from sqlalchemy import create_engine, Column, Integer, String, Text, Boolean, Float, Date, text, inspect, UniqueConstraint, Time
from sqlalchemy.orm import sessionmaker, declarative_base

# Construct the connection string
# driver = 'ODBC+Driver+18+for+SQL+Server'
driver = 'ODBC+Driver+17+for+SQL+Server'
server_address = 'LIKE-YOUCODE-DA\SQLEXPRESS'
database_name = 'TalendIntegration'
username = 'SA' 
password = 'YourPassword123'

# Connection string for master db
master_connection_string = f'mssql+pyodbc://{username}:{password}@{server_address}/{database_name}?driver={driver}&Trusted_Connection=yes'

# Engine for master
master_engine = create_engine(master_connection_string)

In [17]:
# Create class name
def class_name(filename):
    filename = str.replace(filename, '.csv', '')
    filename = str.replace(filename, '_', ' ')
    filename = str.replace(filename, '-', ' ')

    filename = str.title(filename)
    filename = str.replace(filename, ' ', '')
    return filename

In [19]:
db_df.columns

Index(['Invoice_ID', 'Branch', 'City', 'Customer_type', 'Gender',
       'Product_line', 'Unit_price', 'Quantity', 'Tax_5%', 'Total', 'Date',
       'Time', 'Payment', 'cogs', 'gross_margin_percentage', 'gross_income',
       'Rating'],
      dtype='object')

In [None]:
# invoice_id = Column(String(20), nullable=False)
    # branch = Column(String(5), nullable=False)
    # city = Column(String(40), nullable=False)
    # customer_type = Column(String(20), nullable=False)
    # gender = Column(String(20), nullable=False)
    # product_line = Column(String(255), nullable=False)
    # unit_price = Column(Float, nullable=False)
    # quantity = Column(Integer, nullable=False)
    # tax_percent = Column(Float, nullable=False)
    # total = Column(Float, nullable=False)
    # date = Column(Date, nullable=False)
    # time = Column(Time, nullable=False)
    # payment = Column(String(20), nullable=False)
    # cogs = Column(Float, nullable=False)
    # gross_margin_percentage = Column(Float, nullable=False)
    # gross_income = Column(Float, nullable=False)
    # rating = Column(Float, nullable=False)

In [20]:
# Table creation
import numpy as np
Base = declarative_base()

# Define a dictionary to map DataFrame data types to SQLAlchemy column types
data_type_mapping = {
    str: String,
    np.float64: Float,
    np.int64: Integer
}

class Sells(Base):
    __tablename__ ='sells'

    # Column creation
    id = Column(Integer, primary_key=True, autoincrement=True)
    
for column_name in db_df.columns:
    column_data_type = data_type_mapping.get(type(db_df[column_name][0]))
    
    if column_data_type:
        column_definition = Column(column_data_type, nullable=False)
        setattr(Sells, column_name, column_definition)

Base.metadata.create_all(master_engine)

In [21]:
inspector = inspect(master_engine)
inspector.get_table_names()

['sells']

In [22]:
# Create a session
Session = sessionmaker(bind=master_engine)
session = Session()

for _, row in db_df.iterrows():
    new_entry = Sells() 

    # Populate the instance with data from the DataFrame row
    for column_name in db_df.columns:
        if hasattr(new_entry, column_name):
            setattr(new_entry, column_name, row[column_name])

    # Add the new entry to the session and commit
    session.add(new_entry)
    session.commit()

# Close the session
session.close()