In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import csv
import warnings
import glob

In [2]:
warnings.filterwarnings("ignore")

In [3]:
def plot_columns(df):
    colors = plt.cm.tab20(range(df.shape[1]))  

    for i, column in enumerate(df.columns):
        color = colors[i % 24]

        plt.figure()
        plt.plot(df.index, df[column], color=color)
        plt.xlabel('Ano')
        plt.ylabel('Valor')
        plt.title(column)

    plt.show()

In [4]:
def centered_moving_average(column, window_action, window_size):
    start_index, end_index = window_action
    data = column.to_numpy()

    data_length = len(data)

    for i in range(start_index, end_index + 1):
        if not np.isnan(data[i]):
            init = data[i]
            start = max(0, i - window_size)
            end = min(data_length, i + window_size + 1)
            window = data[start:end]
            valid_values = [value for value in window if not pd.isna(value)]
            if len(valid_values) > 0:
                data[i] = np.mean(valid_values)

    modified_column = pd.Series(data, name=column.name)

    return modified_column

In [5]:
# Linear Interpolation 
def ends_filler(column):
    if  column.dtype.kind in ['i', 'f']:
            column = first_filler(column)
            column = last_filler(column)
    return column

def first_filler(column):
    if pd.isnull(column.at[0]) or np.isnan(column.at[0]):
        size = len(column) -1
        first_valid_index = 0
        while first_valid_index <= size and (pd.isnull(column.at[first_valid_index]) or np.isnan(column.at[first_valid_index])):
            first_valid_index += 1
        if first_valid_index > size:
            column.at[0] = 0.0
        else:
            column.at[0] = column.at[first_valid_index]
    return column
    
def last_filler(column):
    if pd.isnull(column.at[len(column) -1]) or np.isnan(column.at[len(column) -1]):  
        last_valid_index = len(column) -1
        while last_valid_index >= 0 and (pd.isnull(column.at[last_valid_index]) or np.isnan(column.at[last_valid_index])):
            last_valid_index -= 1
        if last_valid_index < 0:
            column.at[len(column) -1] = 0.0
        else:
            column.at[len(column) -1] = column.at[last_valid_index]
    return column
    
def prev_valid_index(column, pos):
    prev_index = pos
    size = len(column) -1
    if pos > 0 and pos < size:
        while prev_index >= 0 and (pd.isnull(column.at[prev_index]) or np.isnan(column.at[prev_index])):
            prev_index -= 1
    return prev_index  

def next_valid_index(column, pos):
    next_index = pos
    size = len(column) -1
    if pos > 0 and pos < size:
        while next_index <= size and (pd.isnull(column.at[next_index]) or np.isnan(column.at[next_index])):
            next_index += 1
    return next_index

def linear_filler(column):
    if column.isnull().any():
        for i, value in enumerate(column):
            if i > 0 and (pd.isnull(value) or np.isnan(value) or (value == '')):
                start = prev_valid_index(column, i)
                end = next_valid_index(column, i)
                gap = list(range(start + 1, end))
                linear_a = (column.at[end] - column.at[start])/(end - start)
                linear_b = column.at[start] - linear_a*start
                for x in gap:
                    column.at[x] = linear_a*x + linear_b
    return column 

In [6]:
def read_csv_replace_missing_by_linear(file_path, skiped_rows):
    with open(file_path, 'r') as f:
        
        df = pd.read_csv(file_path, skiprows=skiped_rows)
        
        i = 0
        for col in df.columns: # Elimina missing values por médias e transforma em float
            if i > 0:
                df[col] = df[col].replace('-9999', np.nan)
                df[col] = df[col].replace('-', np.nan)
                df[col] = df[col].replace('', np.nan).astype(float)
                df[col] = ends_filler(df[col])
                df[col] = linear_filler(df[col])
                df[col] = centered_moving_average(df[col], (0, len(df[col]) - 1), 24) #2 anos
            i += 1
        return df

In [7]:
df = pd.read_csv("consumo_cimento_mensal.csv")
df

Unnamed: 0,Ano,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
0,2003-01,13.867,3.260,28.193,6.920,84.701,10.392,17.555,43.342,21.844,...,48.426,254.362,782.111,189.804,142.456,188.093,47.470,33.744,97.447,50.047
1,2003-02,10.777,2.191,31.226,7.476,80.221,6.857,14.976,28.663,15.814,...,54.969,252.842,789.891,174.713,135.829,170.973,45.387,32.230,97.256,48.110
2,2003-03,13.888,2.154,33.484,8.659,76.122,7.011,13.334,27.900,16.067,...,53.847,229.385,774.309,180.801,133.135,174.880,44.907,33.559,101.442,49.006
3,2003-04,11.859,2.643,31.596,7.259,76.544,7.122,15.173,25.739,14.938,...,50.375,243.407,782.071,180.172,144.723,178.111,45.467,34.283,98.217,48.445
4,2003-05,14.431,2.546,32.919,6.786,83.445,5.267,18.728,30.203,17.765,...,52.576,256.535,841.400,185.597,150.523,184.331,56.246,36.066,112.540,51.436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2022-08,49.066,21.748,66.077,9.133,156.442,13.208,54.154,102.698,77.776,...,100.170,241.702,1112.914,412.177,377.545,288.217,192.533,103.854,274.673,77.448
236,2022-09,49.850,20.032,58.590,10.622,145.777,13.476,55.664,98.133,72.377,...,85.832,221.036,1028.144,337.056,370.282,272.178,183.895,95.890,247.240,64.321
237,2022-10,43.733,18.573,58.793,10.300,139.233,11.236,52.320,94.964,71.791,...,91.318,224.399,996.449,340.018,340.900,272.665,180.525,89.679,246.647,72.839
238,2022-11,40.078,15.493,53.509,10.258,142.025,13.549,43.036,97.684,71.015,...,77.187,221.319,1024.272,383.477,363.900,275.972,161.683,99.356,225.813,59.598


In [8]:
df = read_csv_replace_missing_by_linear("consumo_cimento_mensal.csv",0)
df

Unnamed: 0,Ano,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
0,2003-01,16.624040,4.302080,41.817000,4.679440,94.433520,6.994400,26.662520,42.314200,21.414200,...,53.132080,254.416040,773.305520,183.618120,142.484560,175.525120,57.650200,36.692280,117.948200,58.774040
1,2003-02,16.699271,4.378311,42.886692,4.494748,94.646135,6.778631,26.963289,42.178354,21.334162,...,53.330234,253.751848,771.884097,183.581082,142.255291,174.198197,57.722354,36.689895,118.631392,58.974040
2,2003-03,16.862900,4.481052,43.188544,4.324377,94.930432,6.813668,27.357067,42.781095,21.579754,...,53.534382,254.393366,771.836949,184.527007,142.958180,174.021271,58.162724,36.983192,119.574540,59.633225
3,2003-04,16.919722,4.613194,44.111508,4.125520,95.650110,6.720418,27.953817,43.272559,21.724111,...,53.926739,255.627509,772.403377,184.816829,143.438430,173.140771,58.768939,37.184835,120.603826,60.289689
4,2003-05,17.149963,4.725229,44.429439,3.967762,96.379145,6.628625,28.420334,44.017766,22.046353,...,54.398739,256.676406,772.734136,184.983105,143.604430,172.316943,59.443870,37.343938,122.019378,60.989172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2022-08,41.337383,16.108290,52.356976,11.042684,131.049213,12.294396,44.720367,88.758829,67.733413,...,90.952285,229.935633,992.981932,370.746854,332.043235,260.314382,150.857735,91.006952,226.840413,63.035597
236,2022-09,41.108440,15.942849,51.936240,11.095004,130.349451,12.271509,44.458014,88.396036,67.432495,...,90.483430,229.385996,989.751038,369.392962,331.119239,259.543988,149.839283,90.678777,225.445995,62.604339
237,2022-10,40.826223,15.821175,51.744874,11.098552,129.962422,12.235404,44.108559,88.161584,67.297460,...,90.513564,229.571144,989.251249,370.672942,330.291977,259.266144,148.993306,90.604760,224.888230,62.608159
238,2022-11,40.749675,15.739521,51.526987,11.117510,129.785814,12.282449,43.841869,88.023393,67.174903,...,90.354063,229.683489,989.880095,371.987220,330.446639,258.916078,148.147795,90.758502,224.266151,62.270560


In [9]:
df_year = df.copy()
df_year['Ano'] = df_year['Ano'].str[:4]
df_year = df_year.groupby('Ano').sum()
df_year

Unnamed: 0_level_0,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),Ceará - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
Ano,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2003,209.036765,59.16131,544.335207,46.397113,1171.992803,83.360402,353.907266,532.363,266.447557,931.519786,...,661.241285,3100.87336,9301.317106,2221.074359,1733.62127,2065.344812,712.57219,451.455488,1490.959036,746.902092
2004,224.027837,74.185071,593.52251,37.151309,1206.334294,93.577132,388.352068,569.094042,277.977865,964.983452,...,698.202495,3257.728874,9633.39841,2227.858556,1788.621868,2046.07777,721.335469,472.179483,1620.231454,798.414683
2005,252.009974,90.209692,612.519281,35.852102,1209.499542,108.129651,378.112415,628.068693,303.245957,1007.058444,...,734.887914,3484.300068,10250.415021,2221.286932,1840.568001,2078.213749,730.347503,499.688717,1695.226775,786.639325
2006,299.923013,105.40333,645.48711,38.614944,1269.588842,116.400106,388.002719,728.911155,346.881786,1096.03753,...,853.453551,3776.373472,11124.047417,2370.41673,2012.344496,2217.497007,790.393741,580.966157,1836.297394,798.885985
2007,343.819931,112.276339,688.412262,46.688434,1348.417755,115.556426,425.778702,843.594469,399.308797,1205.595564,...,1011.061631,3970.925951,12065.597906,2632.843543,2281.137447,2415.24761,877.993897,664.371951,2036.871526,855.369832
2008,445.399186,121.842155,724.865871,57.872081,1411.679402,119.418334,445.562708,998.19395,473.516085,1359.23468,...,1096.749511,4011.227971,12867.516853,2930.980554,2485.626551,2594.594772,949.605832,702.650278,2221.343627,940.621935
2009,650.311793,142.649491,771.754721,72.857115,1520.851767,132.943916,456.120033,1142.770248,544.333109,1530.741964,...,1157.138788,4088.93867,13565.255083,3318.923494,2669.972051,2825.592326,1019.064888,754.211312,2427.338342,1053.091073
2010,817.533695,159.327478,851.665897,86.211365,1696.977667,157.78431,468.374607,1250.071428,592.265467,1655.285241,...,1223.150372,4279.758386,14227.502615,3752.156964,2888.522956,3105.792503,1122.410356,840.96368,2661.187728,1148.940358
2011,865.041656,168.591603,926.486757,95.123549,1882.505886,198.200411,484.68132,1367.694301,640.00662,1770.106432,...,1283.111756,4487.931264,14649.570863,4096.935945,3082.060239,3285.972711,1262.144076,918.411302,2855.319348,1187.797709
2012,831.11274,178.515557,940.03585,104.996274,2041.167203,225.197351,490.774809,1441.393721,699.656491,1897.376437,...,1316.547967,4624.568854,14711.856204,4295.099312,3219.872997,3350.77814,1394.22029,966.776381,2974.147274,1160.19075


In [10]:
df_year.index = df_year.index.astype(int)
df_2014 = df_year[df_year.index >= 2014] 
df_2014

Unnamed: 0_level_0,Rondônia - Consumo de Cimento (t),Acre - Consumo de Cimento (t),Amazonas - Consumo de Cimento (t),Roraima - Consumo de Cimento (t),Pará - Consumo de Cimento (t),Amapá - Consumo de Cimento (t),Totins-01 - Consumo de Cimento (t),Mahão -01- Consumo de Cimento (t),Piauí - Consumo de Cimento (t),Ceará - Consumo de Cimento (t),...,Espírito to --01 Consumo de Cimento (t),Rio De eiro-01 - Consumo de Cimento (t),São Paulo - Consumo de Cimento (t),Paá - -01Consumo de Cimento (t),ta C-01atarina - Consumo de Cimento (t),Rio Gde D-01o Sul - Consumo de Cimento (t),Mato Grosso - Consumo de Cimento (t),Mato Grosso Do Sul - Consumo de Cimento (t),Goiás - Consumo de Cimento (t),Distrito Federal - Consumo de Cimento (t)
Ano,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014,620.051284,162.799156,813.628529,111.926945,1947.679341,209.55864,465.273816,1285.521689,765.615104,1995.150589,...,1275.992025,4032.197243,12886.038802,4197.799347,3110.071553,3123.976055,1314.521547,962.145964,2700.677734,936.22619
2015,526.124499,151.609123,771.077176,108.281061,1823.114075,186.338809,475.984531,1185.770587,796.302369,2035.067488,...,1231.477484,3430.839264,11567.240507,4044.486542,2976.994028,2949.933548,1224.515882,933.827844,2486.499407,796.220711
2016,458.91108,148.170166,685.705477,101.325608,1669.771476,161.580073,470.335642,1055.960646,782.64464,1995.169214,...,1178.392127,2914.000158,10720.896592,3913.920347,2926.272613,2825.309386,1215.412396,902.535242,2365.569752,695.360429
2017,427.957925,149.734482,582.457626,100.115903,1531.211372,143.077336,445.588486,929.470907,733.329643,1900.201457,...,1163.778513,2708.531876,10463.116711,3850.586338,2987.70796,2770.911175,1279.45739,891.36091,2339.132479,648.26749
2018,425.379572,153.625376,543.459736,110.462981,1446.347354,133.932047,432.039052,882.423098,716.163632,1818.34787,...,1152.280635,2726.012612,10502.711676,3864.298098,3116.979073,2769.980561,1355.958836,909.000637,2340.657615,635.951422
2019,448.430639,164.722314,571.196455,126.815921,1441.159028,136.450109,463.089358,938.794845,754.086209,1866.679451,...,1144.579701,2798.711771,10895.72523,4074.900979,3368.434268,2887.77116,1475.848294,965.463487,2439.018856,668.111557
2020,475.488423,178.233927,595.091505,136.280857,1490.468002,142.871074,503.17808,1008.049482,785.294032,1957.765625,...,1135.926393,2810.319736,11454.689978,4352.580751,3694.456704,3024.036729,1622.44113,1033.355576,2582.285931,716.733633
2021,487.965311,187.872932,611.96791,135.101083,1537.76355,145.99644,522.705753,1040.991019,799.185004,1992.152578,...,1101.100782,2767.098316,11805.901403,4455.256354,3912.594046,3083.190452,1745.453251,1073.866272,2676.365067,747.129704
2022,493.255992,191.530787,623.138977,133.078108,1560.895294,146.677638,533.356322,1058.115706,807.597085,2005.033689,...,1088.761796,2759.930485,11924.034023,4458.500858,3975.872356,3103.90802,1798.169562,1093.1183,2712.065368,754.87569


In [11]:
for column in df_2014.columns:
    file_name = f"2014_01_cons_cim_{column}.csv"
    column_df = df_2014[[column]]
    column_df.to_csv(file_name, index=True)