In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os
import re
import pickle as pk
from scipy.signal import hilbert
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
def preprocess(index):  
    filename = f'D:/Facultad/Tesis/Data_Disgregada/Lkvec_id{index}.csv'  
    data = pd.read_csv(filename)

    # Pongo el tiempo como el índice de los datos
    data.time = pd.to_datetime(data.time, format = "%Y-%m-%d %H:%M:%S" )
    data.set_index('time', inplace = True)

    blog = data[data.media == 'B'].resample('1H').sum()['frequency']
    media = data[data.media == 'M'].resample('1H').sum()['frequency']

    new_index = blog.index.union(media.index)

    media = media.reindex(new_index).fillna(0.00)
    blog = blog.reindex(new_index).fillna(0.00)

    all_data = pd.DataFrame(index = new_index)
    all_data['media'] = media.to_list()
    all_data['blog'] = blog.to_list()

    # We define a cut between 3 days before the global peak and 21 days after that
    resample_dropped = all_data[(all_data.index > (all_data.sum(axis = 1).idxmax() - pd.to_timedelta('3D'))) & (all_data.index < (all_data.sum(axis = 1).idxmax() + pd.to_timedelta('21D')))]

    # Rolling window of 24H to extract the trend
    rolling_dropped = resample_dropped.rolling('1D', center=True).mean()
    return rolling_dropped

def load_files(index):
    filename = preprocess(index)

    data = filename
    #data = pd.read_csv(filename, parse_dates=[0])
    #data.set_index('time', inplace=True)
    data.media = data.media.div(data.media.max())
    data.blog = data.blog.div(data.blog.max())

    # Cambiar acá si queremos ajustar solo una parte de los datos
    init_index = data.media.argmax()
    data = data[init_index:init_index + 168]
    return data

In [3]:
def make_figure(data, index):
    fig, ax = plt.subplots(ncols=1, nrows=1, figsize = (8,5), dpi = 100)
    ax.plot()
    #ax.plot(data_crudo.media.to_numpy(), 'o-', color = 'm', linewidth = 2, label = 'Tw crudo', alpha = 0.85)
    ax.plot(data.media.to_numpy(), '.-', color = 'red', linewidth = 2, label = 'Media data', alpha =1)

    ax.set_ylim([0.00, 1.05])
    ax.set_xlim([0, data.shape[0]])
    ax.grid(True, alpha = 0.15)
    #ax.set_title('Media')

    #ax.plot(data_crudo.blog.to_numpy(), 'o-', color = 'c', linewidth = 2, label = 'RT crudo', alpha = 0.85)
    ax.plot(data.blog.to_numpy(), '.-', color = 'blue', linewidth = 2, label = 'Blogs data', alpha = 1)
    ax.set_ylim([0.00, 1.05])
    ax.set_xlim([0, data.shape[0]])
    ax.grid(True, alpha = 0.15)
    #ax.set_title(citas[index]+ '\nTw y RT')

    fig.tight_layout()
    plt.legend()
    plt.grid(alpha = 0.7)
    plt.xticks(fontsize=13)
    plt.yticks(fontsize=13)
    plt.xlabel('Tiempo [horas]',fontsize = 14)
    plt.savefig('d:/Git_Proyects/Tesis/Lkvec_series/'+f'imagenes_lkvec/serie_{index}.pdf')
    plt.close()

In [4]:
path = 'd:/Git_Proyects/Tesis/Lkvec_series/'
l_data = pd.read_csv(path+'Clasificacion_series.csv')
l_id = l_data['fileid']

for id in l_id:
    data = load_files(id)
    make_figure(data, id)    