In [4]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import csv
import warnings
import glob

In [5]:
warnings.filterwarnings("ignore")

In [6]:
def plot_columns(df):
    colors = plt.cm.tab20(range(df.shape[1]))  

    for i, column in enumerate(df.columns):
        color = colors[i % 24]

        plt.figure()
        plt.plot(df.index, df[column], color=color)
        plt.xlabel('Ano')
        plt.ylabel('Valor')
        plt.title(column)

    plt.show()

In [7]:
def centered_moving_average(column, window_action, window_size):
    start_index, end_index = window_action
    data = column.to_numpy()

    data_length = len(data)

    for i in range(start_index, end_index + 1):
        if not np.isnan(data[i]):
            init = data[i]
            start = max(0, i - window_size)
            end = min(data_length, i + window_size + 1)
            window = data[start:end]
            valid_values = [value for value in window if not pd.isna(value)]
            if len(valid_values) > 0:
                data[i] = np.mean(valid_values)

    modified_column = pd.Series(data, name=column.name)

    return modified_column

In [8]:
# Linear Interpolation 
def ends_filler(column):
    if  column.dtype.kind in ['i', 'f']:
            column = first_filler(column)
            column = last_filler(column)
    return column

def first_filler(column):
    if pd.isnull(column.at[0]) or np.isnan(column.at[0]):
        size = len(column) -1
        first_valid_index = 0
        while first_valid_index <= size and (pd.isnull(column.at[first_valid_index]) or np.isnan(column.at[first_valid_index])):
            first_valid_index += 1
        if first_valid_index > size:
            column.at[0] = 0.0
        else:
            column.at[0] = column.at[first_valid_index]
    return column
    
def last_filler(column):
    if pd.isnull(column.at[len(column) -1]) or np.isnan(column.at[len(column) -1]):  
        last_valid_index = len(column) -1
        while last_valid_index >= 0 and (pd.isnull(column.at[last_valid_index]) or np.isnan(column.at[last_valid_index])):
            last_valid_index -= 1
        if last_valid_index < 0:
            column.at[len(column) -1] = 0.0
        else:
            column.at[len(column) -1] = column.at[last_valid_index]
    return column
    
def prev_valid_index(column, pos):
    prev_index = pos
    size = len(column) -1
    if pos > 0 and pos < size:
        while prev_index >= 0 and (pd.isnull(column.at[prev_index]) or np.isnan(column.at[prev_index])):
            prev_index -= 1
    return prev_index  

def next_valid_index(column, pos):
    next_index = pos
    size = len(column) -1
    if pos > 0 and pos < size:
        while next_index <= size and (pd.isnull(column.at[next_index]) or np.isnan(column.at[next_index])):
            next_index += 1
    return next_index

def linear_filler(column):
    if column.isnull().any():
        for i, value in enumerate(column):
            if i > 0 and (pd.isnull(value) or np.isnan(value) or (value == '')):
                start = prev_valid_index(column, i)
                end = next_valid_index(column, i)
                gap = list(range(start + 1, end))
                linear_a = (column.at[end] - column.at[start])/(end - start)
                linear_b = column.at[start] - linear_a*start
                for x in gap:
                    column.at[x] = linear_a*x + linear_b
    return column 

In [9]:
def read_csv_replace_missing_by_linear(file_path, skiped_rows):
    with open(file_path, 'r') as f:
        
        df = pd.read_csv(file_path, skiprows=skiped_rows)
        
        i = 0
        for col in df.columns: # Elimina missing values por médias e transforma em float
            if i > 0:
                df[col] = df[col].replace('-9999', np.nan)
                df[col] = df[col].replace('-', np.nan)
                df[col] = df[col].replace('', np.nan).astype(float)
                df[col] = ends_filler(df[col])
                df[col] = linear_filler(df[col])
#                 df[col] = centered_moving_average(df[col], (0, len(df[col]) - 1), 24) #2 anos
            i += 1
        return df

In [10]:
df = pd.read_csv("consumo_cimento_mensal.csv")
df

Unnamed: 0,Ano,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
0,2003-01,13.867,3.260,28.193,6.920,84.701,10.392,17.555,43.342,21.844,...,48.426,254.362,782.111,189.804,142.456,188.093,47.470,33.744,97.447,50.047
1,2003-02,10.777,2.191,31.226,7.476,80.221,6.857,14.976,28.663,15.814,...,54.969,252.842,789.891,174.713,135.829,170.973,45.387,32.230,97.256,48.110
2,2003-03,13.888,2.154,33.484,8.659,76.122,7.011,13.334,27.900,16.067,...,53.847,229.385,774.309,180.801,133.135,174.880,44.907,33.559,101.442,49.006
3,2003-04,11.859,2.643,31.596,7.259,76.544,7.122,15.173,25.739,14.938,...,50.375,243.407,782.071,180.172,144.723,178.111,45.467,34.283,98.217,48.445
4,2003-05,14.431,2.546,32.919,6.786,83.445,5.267,18.728,30.203,17.765,...,52.576,256.535,841.400,185.597,150.523,184.331,56.246,36.066,112.540,51.436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2022-08,49.066,21.748,66.077,9.133,156.442,13.208,54.154,102.698,77.776,...,100.170,241.702,1112.914,412.177,377.545,288.217,192.533,103.854,274.673,77.448
236,2022-09,49.850,20.032,58.590,10.622,145.777,13.476,55.664,98.133,72.377,...,85.832,221.036,1028.144,337.056,370.282,272.178,183.895,95.890,247.240,64.321
237,2022-10,43.733,18.573,58.793,10.300,139.233,11.236,52.320,94.964,71.791,...,91.318,224.399,996.449,340.018,340.900,272.665,180.525,89.679,246.647,72.839
238,2022-11,40.078,15.493,53.509,10.258,142.025,13.549,43.036,97.684,71.015,...,77.187,221.319,1024.272,383.477,363.900,275.972,161.683,99.356,225.813,59.598


In [11]:
df = read_csv_replace_missing_by_linear("consumo_cimento_mensal.csv",0)
df

Unnamed: 0,Ano,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
0,2003-01,13.867,3.260,28.193,6.920,84.701,10.392,17.555,43.342,21.844,...,48.426,254.362,782.111,189.804,142.456,188.093,47.470,33.744,97.447,50.047
1,2003-02,10.777,2.191,31.226,7.476,80.221,6.857,14.976,28.663,15.814,...,54.969,252.842,789.891,174.713,135.829,170.973,45.387,32.230,97.256,48.110
2,2003-03,13.888,2.154,33.484,8.659,76.122,7.011,13.334,27.900,16.067,...,53.847,229.385,774.309,180.801,133.135,174.880,44.907,33.559,101.442,49.006
3,2003-04,11.859,2.643,31.596,7.259,76.544,7.122,15.173,25.739,14.938,...,50.375,243.407,782.071,180.172,144.723,178.111,45.467,34.283,98.217,48.445
4,2003-05,14.431,2.546,32.919,6.786,83.445,5.267,18.728,30.203,17.765,...,52.576,256.535,841.400,185.597,150.523,184.331,56.246,36.066,112.540,51.436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2022-08,49.066,21.748,66.077,9.133,156.442,13.208,54.154,102.698,77.776,...,100.170,241.702,1112.914,412.177,377.545,288.217,192.533,103.854,274.673,77.448
236,2022-09,49.850,20.032,58.590,10.622,145.777,13.476,55.664,98.133,72.377,...,85.832,221.036,1028.144,337.056,370.282,272.178,183.895,95.890,247.240,64.321
237,2022-10,43.733,18.573,58.793,10.300,139.233,11.236,52.320,94.964,71.791,...,91.318,224.399,996.449,340.018,340.900,272.665,180.525,89.679,246.647,72.839
238,2022-11,40.078,15.493,53.509,10.258,142.025,13.549,43.036,97.684,71.015,...,77.187,221.319,1024.272,383.477,363.900,275.972,161.683,99.356,225.813,59.598


In [12]:
df_year = df.copy()
df_year['Ano'] = df_year['Ano'].str[:4]
df_year = df_year.groupby('Ano').sum()
df_year

Unnamed: 0_level_0,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),Ceará - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
Ano,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2003,185.386,43.603,418.347,74.283,1052.31,85.093,239.493,470.726,238.886,888.572,...,635.153,3021.269,9528.051,2158.499,1687.172,2103.0,668.575,417.592,1311.935,635.841
2004,209.004,58.142,587.382,39.775,1193.163,80.008,397.426,531.005,276.711,917.163,...,636.659,3092.53,9148.318,2259.91,1740.105,2116.716,716.682,466.165,1514.939,771.216
2005,233.057,80.914,613.286,30.179,1264.982,104.197,431.203,579.458,272.764,986.162,...,768.967,3324.329,9724.722,2225.422,1824.65,1966.059,737.634,479.481,1716.9,855.531
2006,272.001,114.415,626.147,34.243,1181.617,115.54,312.507,693.939,333.178,1055.446,...,673.561,3665.682,10863.79,2203.646,1870.478,2106.926,701.548,515.485,1730.142,747.184
2007,351.377,121.872,666.931,35.931,1272.803,135.046,426.737,816.847,386.585,1173.764,...,974.754,4107.097,11775.453,2382.72,2081.839,2280.943,840.901,632.929,1943.769,808.342
2008,390.788,119.145,769.233,60.009,1559.491,116.366,450.745,963.205,449.076,1328.591,...,1263.713,4300.676,13476.268,3001.521,2653.587,2718.542,994.957,814.771,2310.616,909.713
2009,452.325,121.146,744.252,63.064,1355.544,112.729,467.588,1123.586,536.766,1437.485,...,1138.824,3966.93,13304.301,3092.068,2669.4,2685.74,998.465,693.327,2335.717,991.263
2010,921.425,174.705,796.116,86.408,1640.682,151.542,487.208,1378.741,639.535,1794.24,...,1223.466,4199.911,14534.98,3749.448,2852.759,3070.312,1099.34,813.149,2657.527,1167.604
2011,1021.57,178.322,948.011,103.009,1856.176,161.97,458.331,1282.016,625.022,1717.74,...,1271.961,4390.254,14871.943,4174.942,3087.81,3399.177,1204.074,923.248,2901.707,1277.89
2012,933.501,173.494,994.932,100.066,2042.91,261.12,508.683,1528.77,687.536,1903.741,...,1354.74,4733.632,15159.683,4522.969,3268.31,3487.789,1381.539,1008.624,3061.533,1261.667


In [13]:
df_year.index = df_year.index.astype(int)
df_2014 = df_year[df_year.index >= 2014] 
df_2014

Unnamed: 0_level_0,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),Ceará - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
Ano,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,614.394714,171.709286,771.567857,120.417571,2041.730714,227.243,426.767286,1303.411571,755.713857,1964.413143,...,1296.041857,4454.468,13714.511286,4341.821857,3246.749286,3226.786714,1411.234429,995.222143,2861.897429,979.998857
2015,510.273771,151.486629,746.867486,113.282057,1863.549771,198.0696,454.929429,1191.509657,794.945486,2037.703714,...,1250.323886,3639.072,11767.653429,4108.786286,3017.084229,2978.617371,1241.100343,951.041314,2517.290743,811.194686
2016,410.580514,134.223086,739.457657,105.481371,1705.680514,166.8504,489.016286,1100.217771,842.687657,2146.547143,...,1210.917257,2811.648,9811.084286,3869.334857,2776.217486,2744.168914,1083.128229,904.537543,2193.545829,638.818457
2017,406.814,150.491,687.211,99.723,1564.94,135.021,500.27,971.921,831.356,2035.158,...,1097.369,2284.918,9918.501,3764.518,2891.489,2703.746,1220.058,870.341,2270.346,584.646
2018,376.138,148.696,350.177,83.012,1375.281,123.347,396.438,750.012,626.223,1758.9,...,1146.39,2391.212,10001.077,3773.354,2976.485,2665.575,1316.386,869.68,2282.903,571.13
2019,399.441,153.806,533.444,121.12,1290.928,120.153,402.932,774.045,702.437,1701.23,...,1139.906,2715.843,10458.276,3786.974,3258.027,2753.564,1446.064,902.144,2367.716,637.028
2020,488.194,178.033,595.32,151.571,1444.547,133.523,500.126,1038.249,788.164,1864.364,...,1138.655,2935.607,11181.809,4235.532,3537.032,2993.25,1555.266,1038.045,2438.585,662.625
2021,502.824,189.357,610.155,144.989,1529.052,157.621,542.03,1059.129,826.368,2127.104,...,1135.022,2829.682,12113.838,4784.413,4089.333,3222.295,1788.146,1092.056,2782.135,789.947
2022,501.142,202.321,640.274,132.217,1592.624,142.739,554.321,1083.999,804.417,1966.646,...,1040.26,2682.995,12383.649,4538.256,4255.808,3135.029,1947.017,1152.06,2807.973,786.865


In [14]:
for column in df_2014.columns:
    file_name = f"2014_01_cons_cim_{column}.csv"
    column_df = df_2014[[column]]
    column_df.to_csv(file_name, index=True)