## Equal treatment in employment cases in Poland 2001-2021

### 1. topic description

In [1]:
id = 4
topicName = 'equal-treatment-pl'
title = """
Equal treatment in employment - court cases in Poland from 2010 to 2021
""".replace('\n',' ').strip()
titlePl = """
Równe traktowanie w zatrudnieniu - sprawy sądowe w Polsce w latach 2010-2021
""".replace('\n',' ').strip()
country = 'Poland'
countryPl = 'Polska'
startYear = 2010
endYear = 2021
description="""
Records of cases for compensation for violation of the 
principles of equal treatment in employment 
in Poland (Article 18 3d of the Labor Code).
Data is collected and published by the Polish
Ministry of Justice (2010-2021). 
""".replace('\n',' ').strip()
descriptionPl="""
Ewidencja spraw o odszkodowanie z tytułu naruszenia 
zasad równego traktowania w zatrudnieniu w Polsce (art. 18 3d Kodeksu Pracy).
Dane opublikowane przez polskie Ministerstwo Sprawiedliwości (2010-2021).
""".replace('\n',' ').strip()
sourceName = "Ministry of Justice (PL)"
sourceNamePl ="Ministerstwo Sprawiedliwości (PL)"
sourceLink="https://isws.ms.gov.pl/pl/baza-statystyczna/opracowania-wieloletnie/"
fileName = topicName
sourceFileExt = 'xlsx'

In [None]:
from src.database.insert_topic import insert_topic

insert_topic(
    id,
    topicName,
    title,
    titlePl,
    country,
    countryPl,
    startYear,
    endYear,
    description,
    descriptionPl,
    sourceName,
    sourceNamePl,
    sourceLink,
    fileName,
    sourceFileExt
)

### 2. Columns names & descriptions

In [3]:
column_names_descriptions = {

    0: ('filed', 
        'court cases filed', 
        'wpłynęło'),
    1: ('granted', 
        'court cases granted', 
        'uwzględniono'),
    2: ('denied', 
        'court cases denied', 
        'oddalono'),
    3: ('returned', 
        'court cases returned', 
        'zwrócono'),
    4: ('rejected', 
        'court cases rejected', 
        'odrzucono'),
    5: ('discontinued', 
        'court cases discontinued', 
        'discontinued'),
    6: ('other', 
        'other', 
        'inne załatwienia'),
    7: ('pending', 
        'pending', 
        'pozostałe na następny okres'),
    8: ('year', 
        'year', 
        'rok'),
    9: ('court', 
        'couth', 
        'sąd'),
    10: ('sex', 
        'sex', 
        'płeć'),
    
}

In [4]:
column_names = [t[0] for t in column_names_descriptions.values()]

In [5]:
import pandas as pd

df = pd.DataFrame(columns=column_names)


df

Unnamed: 0,filed,granted,denied,returned,rejected,discontinued,other,pending,year,court,sex


In [6]:
from pandas import DataFrame


def add_rows(sheet_name: str, data: DataFrame, court: str):
    data.columns = column_names[:8]

    data['year'] = [sheet_name,sheet_name]
    
    data['court'] = [court, court]

    data['sex'] = ['female','male']

    return data 

In [7]:
original_data_path = '../data/raw/equal-treatment-pl/equal-treatment-pl-2010-2021.xlsx'

xls = pd.ExcelFile(original_data_path)

sheet_names = xls.sheet_names

for sheet_name in sheet_names:
    temp_df = pd.read_excel(original_data_path, skiprows=8, skipfooter=7,sheet_name=sheet_name)
    temp_df = temp_df.drop(temp_df.columns[4], axis=1)
    district_courts = temp_df.iloc[0:2,3:11]
    provincial_courts = temp_df.iloc[10:,3:11]
    df = pd.concat([df,add_rows(sheet_name, district_courts, 'district')])
    df = pd.concat([df,add_rows(sheet_name, provincial_courts, 'provincial')])

df = df.reset_index(drop=True)


In [8]:
df.head(10)

Unnamed: 0,filed,granted,denied,returned,rejected,discontinued,other,pending,year,court,sex
0,334.0,19,47.0,8,–,49,65.0,146.0,2011,district,female
1,508.0,18,34.0,23,2,36,167.0,223.0,2011,district,male
2,34.0,2,4.0,1,–,3,3.0,21.0,2011,provincial,female
3,37.0,2,4.0,2,–,2,9.0,17.0,2011,provincial,male
4,385.0,33,55.0,2,2,32,96.0,165.0,2012,district,female
5,487.0,57,38.0,1,2,81,110.0,198.0,2012,district,male
6,31.0,2,3.0,–,–,2,2.0,22.0,2012,provincial,female
7,30.0,–,5.0,1,–,3,1.0,20.0,2012,provincial,male
8,325.0,29,63.0,1,–,30,30.0,172.0,2013,district,female
9,446.0,33,62.0,3,2,64,70.0,212.0,2013,district,male


In [9]:
df = df.replace(['.', '–'], "0")

replace_dash_with_zero = lambda x: x.replace('-', '0') if isinstance(x, str) else x

df = df.applymap(replace_dash_with_zero)

In [10]:
df.isna().any()

filed           False
granted         False
denied          False
returned        False
rejected        False
discontinued    False
other           False
pending         False
year            False
court           False
sex             False
dtype: bool

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44 entries, 0 to 43
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   filed         44 non-null     float64
 1   granted       44 non-null     object 
 2   denied        44 non-null     float64
 3   returned      44 non-null     object 
 4   rejected      44 non-null     object 
 5   discontinued  44 non-null     object 
 6   other         44 non-null     float64
 7   pending       44 non-null     float64
 8   year          44 non-null     object 
 9   court         44 non-null     object 
 10  sex           44 non-null     object 
dtypes: float64(4), object(7)
memory usage: 3.9+ KB


In [12]:

df = df.astype({f"{x}":int for x in df.columns[:9]})

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44 entries, 0 to 43
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   filed         44 non-null     int64 
 1   granted       44 non-null     int64 
 2   denied        44 non-null     int64 
 3   returned      44 non-null     int64 
 4   rejected      44 non-null     int64 
 5   discontinued  44 non-null     int64 
 6   other         44 non-null     int64 
 7   pending       44 non-null     int64 
 8   year          44 non-null     int64 
 9   court         44 non-null     object
 10  sex           44 non-null     object
dtypes: int64(9), object(2)
memory usage: 3.9+ KB


### 3. create db tables

In [13]:
from src.database.connect_db import connect_db

db = connect_db()

cursor = db.cursor()

In [14]:
MAIN_TABLE_NAME = "equalTreatmentPl"

In [15]:
query = f"CREATE TABLE {MAIN_TABLE_NAME} (id int NOT NULL AUTO_INCREMENT PRIMARY KEY, "

for column in df.columns[:9]:
    query += f"{column} INT NOT NULL, "
for column in df.columns[9:]:
    query += f"{column} VARCHAR(255) NOT NULL, "


query = query[:-2] + ")"

In [16]:
try:
    cursor.execute(query)
except:
    print('table already created')


table already created


In [17]:
data = [tuple(row) for index, row in df.iterrows()]

In [18]:
query = f"""
INSERT INTO {MAIN_TABLE_NAME} ({', '.join(df.columns)}) 
VALUES ({', '.join(['%s']*len(df.columns))});
"""

try:
    cursor.executemany(query, data)
except:
    print('data added already')

In [19]:
from pandas import DataFrame


def insertData(ca_df:DataFrame, tableName:str):
    data = [tuple(row) for index, row in ca_df.iterrows()]
    
    query = f"""
    INSERT INTO {tableName} ({', '.join(ca_df.columns)}) 
    VALUES ({', '.join(['%s']*len(ca_df.columns))});
    """

    try:
        cursor.executemany(query, data)
    except:
        print('data added already')
    

In [20]:
cursor.execute(f"""
               SELECT * 
               FROM {MAIN_TABLE_NAME}
               LIMIT 5
               """)
cursor.fetchall()

((1, 334, 19, 47, 8, 0, 49, 65, 146, 2011, 'district', 'female'),
 (2, 508, 18, 34, 23, 2, 36, 167, 223, 2011, 'district', 'male'),
 (3, 34, 2, 4, 1, 0, 3, 3, 21, 2011, 'provincial', 'female'),
 (4, 37, 2, 4, 2, 0, 2, 9, 17, 2011, 'provincial', 'male'),
 (5, 385, 33, 55, 2, 2, 32, 96, 165, 2012, 'district', 'female'))

In [None]:
from src.database.create_ds_desc_table import create_ds_desc_table


create_ds_desc_table(MAIN_TABLE_NAME)

In [22]:
data = [row for row in column_names_descriptions.values()]

In [None]:
from src.database.insert_into_ds_desc import insert_into_ds_desc


insert_into_ds_desc(MAIN_TABLE_NAME, data)

### 4. upload files to GCP storage

In [24]:
df_desc = pd.DataFrame(data, 
    columns=['column_name', 
    'description', 
    'descriptionPl'])

In [25]:
df_path = f'../data/processed/{topicName}/{fileName}.csv'
df_path_desc =f'../data/processed/{topicName}/{fileName}-desc.csv'

In [26]:
df.to_csv(df_path, index=False)
df_desc.to_csv(df_path_desc, index=False)

In [27]:
destination = f"{topicName}/{fileName}.csv"
destination_desc = f"{topicName}/{fileName}-desc.csv"
destination_original = f"{topicName}/{fileName}-source.{sourceFileExt}"

In [28]:
from src.utils.upload_file_gpc import upload_file

upload_file(df_path,destination)
upload_file(df_path,destination_desc)
upload_file(original_data_path,destination_original)

File ../data/processed/equal-treatment-pl/equal-treatment-pl.csv uploaded to legal-charts-datasets/equal-treatment-pl/equal-treatment-pl.csv.
File ../data/processed/equal-treatment-pl/equal-treatment-pl.csv uploaded to legal-charts-datasets/equal-treatment-pl/equal-treatment-pl-desc.csv.
File ../data/raw/equal-treatment-pl/equal-treatment-pl-2010-2021.xlsx uploaded to legal-charts-datasets/equal-treatment-pl/equal-treatment-pl-source.xlsx.
