In [1]:
import config.config as cf
import sqlite3
import pandas as pd
import numpy as np
import os
import sys

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
file_path = '../data/tv/tv_live_1.csv'

In [3]:
names = {'NEONET.PL': 'NEONET AGD RTV',
         'EURO.COM.PL': 'EURO RTV AGD',}
dayparts = {'Off stacji': 'off',
            'Prime stacji': 'prime'}

In [4]:
def get_index_val(table_name: str, cur: sqlite3.Cursor)-> int:
    """
    Function gets max index value from the selected table and returns it as an integer increased by one.
    When the table is empty, returns 1

    :param table_name: Name of the table out of which the data is going to be pulled, 
    represented as a str
    :param cur: Is a cursor object created for con object
    :raise sqlite3.ProgrammingError: If there are any error raised by the DB-API
    :daise sqlite3.OperationalError: If any eceptions on the DB side are raised, i.g. DB being locked
    :return: number representiung max index value of selected table icreased by 1
    :rtype: int
    """
    
    query = (f'SELECT MAX(id) FROM {table_name};')
    cur.execute(query)
    ind = cur.fetchone()
    if ind[0] == None:
        return 1
    else:
        return ind[0] + 1
    
def iter_over_inputs(data_set:list[dict[list,str,str,str]], con: sqlite3.Connection, 
                        cur: sqlite3.Cursor)-> None:
    """
    Main loop for iteration over one column tables.

    :param data_set: List containing dicts with data, table name and field/column name.
    List contains strings or integers representing the data to be added into selected tables.
    :param table_name: String representing name of the table into which data is going to be added
    :param con: Is a connection object, pointing to a DB
    :param cur: Is a cursor object created for con object
    :raise KeyError: If key name does not match the pattern
    :raise sqlite3.ProgrammingError: If there are any error raised by the DB-API
    :daise sqlite3.OperationalError: If any eceptions on the DB side are raised, i.g. DB being locked
    :return: None
    """
    
    for elem in data_set:
        data = elem['data']
        table = elem['table']
        field = elem['field']
        table_type = elem['type']
        data_type = elem['dtype']
        new_data, data = check_for_data_1_field(data, table, field, table_type, data_type, cur)
        if new_data:
            add_1_field(data, table, field, table_type, data_type, con, cur)

def add_1_field(data:list, table_name:str, field_name:str, type_:str, dtype:str,
                con: sqlite3.Connection, cur: sqlite3.Cursor)-> None:
    """
    Skeleton function for adding data to up to three column tables.

    :param data: List of strings or integers representing table contents
    :param table_name: String representing name of the table into which data is going to be added
    :param type_: String representing type of target where to add the data. Available table or view
    :param dtype: String represzenting to what type data should be converted before upload
    :param con: Is a connection object, pointing to a DB
    :param cur: Is a cursor object created for con object
    :raise sqlite3.ProgrammingError: If there are any error raised by the DB-API
    :daise sqlite3.OperationalError: If any eceptions on the DB side are raised, i.g. DB being locked
    :return: None
    """
    
    if type_ == 'table':
        query = (f'INSERT INTO {table_name} ({field_name}) VALUES(:name);')
    else:
        if table_name == 'brandy':
            query = (f'INSERT INTO podziel_brandy_view ({field_name}) VALUES(:name);')
        else:
            query = (f'INSERT INTO podziel_kanaly_view ({field_name}) VALUES(:name);')
    for elem in data:
        if dtype.startswith('int'):
            to_add = {'name': int(elem)}
        else:
            to_add = {'name': str(elem)}
            
        cur.execute(query, to_add)
    con.commit()

def check_for_data_1_field(data_:list|pd.DataFrame, table_name:str, field_name:str, type_:str, dtype:str,
                            cur: sqlite3.Cursor,)-> tuple[bool,list[str|int]]:
    """
    Skeleton function for checking if there is data inside each of one column tables.
    Ads data if there are any new entries, skips if no new data was found. 
    If DB is empty returns immediately.

    :param data_: List containing data to be checked and added. Data is of str or int types.
    :param table_name: String representing name of the table into which data is going to be added
    :param field_name: String representing name of the field/ column name
    :param type_: String representing type of target where to add the data. Available table or view
    :param dtype: String represzenting to what type data should be converted before upload
    :param cur: Is a cursor object created for con object
    :param avoid_adding: List of tablet which doesn't need to be updated
    :raise sqlite3.ProgrammingError: If there is an error raised by the DB-API
    :daise sqlite3.OperationalError: If any eceptions on the DB side are raised, i.g. DB being locked
    :return: A tuple containing bool for logic purposes, anbd the data set to be added
    :rtype: tuple[bool, list[str|int]]
    """
    
    if table_name != 'kody_rek':
        query = (f"SELECT {field_name} FROM {table_name};")
    else:
        query = (f"SELECT kod_rek FROM {table_name};")
    cur.execute(query)
    
    in_db = pd.DataFrame([elem[0] for elem in cur.fetchall()])
    in_db.rename(columns={0: field_name}, inplace=True)
    if table_name == 'kody_rek':
        in_db = in_db.astype('int32') # new
    else:
        in_db = in_db.astype(dtype) # new
    
    if len(in_db) == 0 and table_name in ('kody_rek', 'brandy', 'kanaly'):
        data_ = data_.iloc[:, -1]
        return (True, data_)
    elif len(in_db) == 0:
        return (True, data_)

    if table_name not in ('kody_rek', 'brandy', 'kanaly'):
        data_ = pd.DataFrame(data_)
        data_ = data_.rename(columns={0: field_name})
        data_ = data_.astype({field_name: dtype})
    else: # new
        cols = data_.columns.to_list() # new
        if table_name == 'kody_rek':
            data_ = data_.astype({cols[0]: 'int32'})
        else:
            data_ = data_.astype({cols[0]: dtype}) # new
        
    # we check if df contains new data in comparison to DB
    if table_name in ('kody_rek', 'brandy', 'kanaly'):
        filtr = ~data_[cols[0]].isin(in_db[field_name])
        new_data = data_[filtr].dropna()
        new_data = new_data.rename({cols[-1]: field_name}, axis=1)
        new_data = new_data[field_name]
    else:
        filtr = ~data_[field_name].isin(in_db[field_name])
        new_data = data_[filtr].dropna()
        new_data = new_data[field_name]
        
    new_data = list(new_data)
    
    
    if len(new_data) != 0:
        print(f'>>> Adding to {table_name}. New data found.')
        return (True, new_data)
    else:
        print(f'>>> Not adding to {table_name}. No new data found.')
        return (False, list(''))

In [5]:
# Openes connection to the DB
print('Oppening connection.')
con = sqlite3.Connection(cf.DB_PATH)
cur = con.cursor()

Oppening connection.


In [6]:
df_tv = pd.read_csv(file_path, delimiter=';', thousands=' ', decimal=',', dtype={
                    'Month': 'category', 'Week': 'category', 'Weekday': 'category',
                    'Dayparts': 'object', 'Channel Groups': 'object', 
                    'Channel': 'object', 'PIB pos': 'int8', 'PIB (real) rel': 'object',
                    'PIB count': 'int16', 'Dur rounded,sp': 'int8', 'Spot Class': 'object',
                    'Block Code': 'object', 'Syndicate': 'object', 'Producer': 'object',
                    'Brand': 'object', 'Film Code': 'object', 'Prog Campaign': 'object',
                    'Prog Before': 'object', 'Prog After': 'object', 
                    'Film Code/2': 'object'}, 
                    parse_dates=['Date', 'Time'], date_format='%d.%m.%Y'
                    )
df_tv['Brand'] = df_tv['Brand'].str.strip()
df_tv['Brand'] = df_tv['Brand'].str.upper()
df_tv['Producer'] = df_tv['Producer'].str.upper()
df_tv['Syndicate'] = df_tv['Syndicate'].str.upper()
df_tv['Brand'] = df_tv['Brand'].map(names).fillna(df_tv['Brand'])
df_tv['Dayparts'] = df_tv['Dayparts'].map(dayparts).fillna(df_tv['Dayparts'])
df_tv['DateTime'] = df_tv['Date'].astype('str') + ' ' + df_tv['Time']
df_tv['Kod Opis'] = df_tv['Film Code/2'] + '@|@' + df_tv['Film Code']
df_tv['Kanal Grupa'] = df_tv['Channel'] + '@|@' + df_tv['Channel Groups']
df_tv['Brand Prod Synd'] = df_tv['Brand'] + '@|@' + df_tv['Producer'] + '#|#' + df_tv['Syndicate']
df_tv['DateTime'] = pd.to_datetime(df_tv['DateTime'], format='ISO8601')
df_tv[['Dayparts', 'Prog Before', 'Prog After']] = df_tv[['Dayparts', 'Prog Before', 'Prog After']].fillna('brak danych', axis=1)
df_tv.sort_values(by='Date', inplace=True, axis=0)
df_tv.reset_index(inplace=True)
# df_tv.info()

In [7]:
dates = df_tv['Date'].dt.strftime('%Y-%m-%d').unique()
ad_codes = df_tv.loc[:, ['Film Code/2', 'Kod Opis']].drop_duplicates(subset=['Film Code/2'], keep='first', ignore_index=True)
brands = df_tv.loc[:, ['Brand', 'Brand Prod Synd']].drop_duplicates(subset=['Brand'], keep='first', ignore_index=True)
channels = df_tv.loc[:, ['Channel', 'Kanal Grupa']].drop_duplicates(subset=['Channel'], keep='first', ignore_index=True)
dayparts = df_tv['Dayparts'].unique()
pib_real_rels = df_tv['PIB (real) rel'].unique()
durations = df_tv['Dur rounded,sp'].unique()
spot_classes = df_tv['Spot Class'].unique()
block_codes = df_tv['Block Code'].unique()
prog_campaign = df_tv['Prog Campaign'].unique()
prog_before= df_tv['Prog Before'].unique()
prog_after = df_tv['Prog After'].unique()

data_set = [{'data': dates, **cf.DATES},
            {'data': dayparts, **cf.DAYPARTS},
            {'data': pib_real_rels, **cf.PIB_R},
            {'data': durations, **cf.DUR},
            {'data': spot_classes, **cf.SPOT_CLASS},
            {'data': block_codes, **cf.BLOCK_CODE},
            {'data': prog_campaign, **cf.PR_CAMP},
            {'data': prog_before, **cf.PR_BEF},
            {'data': prog_after, **cf.PR_AFT},
            {'data': ad_codes, **cf.AD_CODE},
            {'data': brands, **cf.BRANDS},
            {'data': channels, **cf.CHANNELS},
            ]



In [8]:
try:
    iter_over_inputs(data_set, con, cur)
except sqlite3.ProgrammingError as e:
    con.close()
    print('Failed to input the data.')
    print(f'Error: {e}')
except sqlite3.OperationalError as e:
    con.close()
    print('Failed to input the data.')
    print(f'Error: {e}')
    exit()
except sqlite3.IntegrityError as e:
    con.close()
    print('Failed to input the data.')
    print(f'Error: {e}')
    exit()
    


>>> Adding to data_czas. New data found.
>>> Adding to dayparty. New data found.
>>> Not adding to pib_real_rels. No new data found.
>>> Adding to dlugosci. New data found.
>>> Not adding to klasy_spotu. No new data found.
>>> Not adding to kody_bloku. No new data found.
>>> Adding to prog_kampanie. New data found.
>>> Adding to programy_przed. New data found.
>>> Adding to programy_po. New data found.
>>> Adding to kody_rek. New data found.
>>> Adding to brandy. New data found.
>>> Adding to kanaly. New data found.
