In [None]:
id = 3
topicName = 'class-actions-pl'
title = """
Class actions in Poland 2010-2021
""".replace('\n',' ').strip()
titlePl = """
Pozwy zbiorowe w Polsce 2010-2021
""".replace('\n',' ').strip()
country = 'Poland'
countryPl = 'Polska'
startYear = 2010
endYear = 2021
description="""
Class action lawsuits in civil, commercial and labor 
law cases in which claims were brought by 
a larger number of employees (2010-2021).
Data is collected and published by the Polish 
Ministry of Justice.
""".replace('\n',' ').strip()
descriptionPl="""
Pozwy zbiorowe w sprawach cywilnych, 
gospodarczych i sprawy z zakresu prawa pracy, 
w których z pozwem wystąpiła większa grupa pracowników (2010-2021) - Polska.
Dane opublikowane przez polskie Ministerstwo Sprawiedliwości.
""".replace('\n',' ').strip()
sourceName = "Ministry of Justice (PL)"
sourceNamePl ="Ministerstwo Sprawiedliwości (PL)"
sourceLink="https://isws.ms.gov.pl/pl/baza-statystyczna/opracowania-wieloletnie/"
fileName= "class-actions-pl"
sourceFileExt = 'xlsx'

In [None]:
from src.database.insert_topic import insert_topic

insert_topic(
    id,
    topicName,
    title,
    titlePl,
    country,
    countryPl,
    startYear,
    endYear,
    description,
    descriptionPl,
    sourceName,
    sourceNamePl,
    sourceLink,
    fileName,
    sourceFileExt
)

In [None]:
import pandas as pd

original_data_path = '../data/raw/class-actions-pl/pozwy-zbiorowe-2010-2021.xlsx'

df = pd.read_excel(original_data_path, 
                   skiprows=6)

df.head()

In [None]:
df = df.replace(['.', '–'], 0)

In [None]:
column_names_descriptions = {
    0: ('year', 
        'year', 
        'rok'),
    1: ('filed', 
        'court cases filed', 
        'wpłynęło'),
    2: ('settled', 
        'court cases settled', 
        'załatwione'),
    3: ('rejected', 
        'court cases rejected', 
        'odrzucone'),
    4: ('denied', 
        'court cases denied', 
        'oddalono'),
    5: ('returned', 
        'court cases returned', 
        'zwrócono'),
    6: ('pending', 
        'court cases pending', 
        'oczekujące na rozstrzygnięcie')
}

In [None]:
new_column_names = [t[0] for t in column_names_descriptions.values()]

In [None]:
df_c = df[:12]

df_c.columns = new_column_names

df_c.isna().any()

df_c = df_c.astype(int)

df_c

In [None]:
df_gc = df[18:30]

df_gc.columns = new_column_names

df_gc.isna().any()

df_gc = df_gc.astype(int)

df_gc

In [None]:
df_pr = df.iloc[37:49, 2:5]

df_pr.columns = new_column_names[:3]

df_pr = df_pr.astype(int)

df_pr

In [None]:
df_po = df.iloc[50:62, 2:5]

df_po.columns = new_column_names[:3]

df_po = df_po.astype(int)

df_po

In [None]:
dsName = 'classActionsPl'
case_types = {
    'C': df_c,
    'Gc': df_gc,
    'Pr': df_pr,
    'Po': df_po,
}

ds_names = {f'{dsName}{x}' for x in case_types}

ds_names

In [None]:
from src.database.connect_db import connect_db

db = connect_db()

cursor = db.cursor()

In [None]:
from pandas import DataFrame


def createTable(ca_df:DataFrame, tableName:str):
    query = f"CREATE TABLE {tableName} (year INT PRIMARY KEY, "

    for column in ca_df.columns[1:]:
        query += f"{column} INT NOT NULL, "


    query = query[:-2] + ")"
    
    try:
        cursor.execute(query)
    except:
        print('table already created')
    

In [None]:
for key, value in case_types.items():
    createTable(value, f'{dsName}{key}')


In [None]:
from pandas import DataFrame


def insertData(ca_df:DataFrame, tableName:str):
    data = [tuple(row) for index, row in ca_df.iterrows()]
    
    query = f"""
    INSERT INTO {tableName} ({', '.join(ca_df.columns)}) 
    VALUES ({', '.join(['%s']*len(ca_df.columns))});
    """

    try:
        cursor.executemany(query, data)
    except:
        print('data added already')
    

In [None]:
for key, value in case_types.items():
    insertData(value, f'{dsName}{key}')

In [None]:
cursor.execute(f"""
               SELECT * 
               FROM classActionsPlC
               LIMIT 5
               """)
cursor.fetchall()

In [None]:
from src.database.create_ds_desc_table import create_ds_desc_table


create_ds_desc_table(dsName)

In [None]:
data = [row for row in column_names_descriptions.values()]

In [None]:
from src.database.insert_into_ds_desc import insert_into_ds_desc


insert_into_ds_desc(dsName, data)

In [None]:
df_desc = pd.DataFrame(data, 
    columns=['column_name', 
    'description', 
    'descriptionPl'])

In [None]:
df_path_desc =f'../data/processed/{topicName}/{fileName}-desc.csv'

In [None]:
for key, value in case_types.items():
    value.to_csv(f'../data/processed/{topicName}/{fileName}-{key}.csv', index=False)

In [None]:
df_desc.to_csv(df_path_desc, index=False)

In [None]:
destination = f"{topicName}/{fileName}"
destination_desc = f"{topicName}/{fileName}-desc.csv"
destination_original = f"{topicName}/{fileName}-source.{sourceFileExt}"

In [None]:
from src.utils.upload_file_gpc import upload_file
from src.utils.zip_folder import zip_folder

output_path = f'../data/processed/{topicName}.zip'

zip_folder( '../data/processed/class-actions-pl/',output_path)


upload_file(output_path,destination)
upload_file(df_path_desc,destination_desc)
upload_file(original_data_path,destination_original)