<a href="https://colab.research.google.com/github/OMK-Kostroma/Elasticity/blob/main/%D0%B0%D0%BD%D0%B0%D0%BB%D0%B8%D0%B7_%D1%87%D0%B5%D0%BA%D0%BE%D0%B2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import auth
import gspread
from google.auth import default
import pandas as pd

class DataGetter:
  def __init__(self):
    self.gsheets=self.get_gsheets()


  def get_gsheets(self):
    auth.authenticate_user()
    creds, _ = default()
    gc = gspread.authorize(creds)
    gsheets = gc.open_by_url('https://docs.google.com/spreadsheets/d/1K8kXxyLmd6IgcRtLi8QoAwoNLXvYiPkFnDE3jdJILYg/')
    return gsheets


  def get_raw(self):
    sheets = self.gsheets.worksheet('raw').get_all_values()
    raw = pd.DataFrame(sheets[1:], columns=sheets[0])
    return raw

  def get_date_chars(self):
    sheets = self.gsheets.worksheet('date_characteristics').get_all_values()
    date_chars = pd.DataFrame(sheets[1:], columns=sheets[0])
    date_chars['DATE']= pd.to_datetime(date_chars['DATE'], format="%d.%m.%Y").dt.date
    return date_chars

  def get_categories(self):
    sheets = self.gsheets.worksheet('categories_map').get_all_values()
    categories = pd.DataFrame(sheets[1:], columns=sheets[0])
    return categories

  def get_raw_data(self):
    raw=self.get_raw()
    date_chars=self.get_date_chars()
    categories=self.get_categories()
    return raw, date_chars, categories

class Preparator:
  def __init__(self,raw, date_chars, categories):
    self.raw=raw
    self.date_chars=date_chars
    self.categories=categories


  def get_base_of_full_data(self):
    full_data=self.raw.copy()
    full_data=full_data[['DATE','SKU', 'CHECK', 'PRICE', 'COUNT', 'REVENUE']]
    full_data['DATE']=pd.to_datetime(full_data['DATE'], format="%d.%m.%Y %H:%M:%S")
    full_data['HOUR']=pd.to_datetime(full_data['DATE']).dt.hour
    full_data["MINUTE"]=pd.to_datetime(full_data['DATE']).dt.minute
    full_data['DATE']=pd.to_datetime(full_data['DATE']).dt.date
    return full_data

  def get_really_full_data(self,full_data):
    df=pd.merge(left=full_data,right=self.date_chars, on ='DATE',how='left')
    df=pd.merge(left=df,right=self.categories,on ='SKU',how='left')
    return df

  def get_df(self):
    full_data=self.get_base_of_full_data()
    df=self.get_really_full_data(full_data)
    return df

In [3]:
getter=DataGetter()
raw, date_chars, categories=getter.get_raw_data()
prep=Preparator(raw, date_chars, categories)
df=prep.get_df()

In [5]:
import pickle
path=r'/content/drive/MyDrive/ОМК/store_analyzing/data/full_raw_df.pkl'
temp=df[['ID1', 'ID2', 'ID3','SKU','CHECK',
      'DATE','HOUR', 'MINUTE',
      'DOW', 'DOM', 'DOY', 'WOM', 'WOY', 'MOY',
      'DAY_TYPE', 'HOLIDAY_ID', 'IS_HOLIDAY',
      'Temperature', 'Atmospheric_pressure', 'Humidity', 'Wind Speed', 'Nebulosity', 'WEATHER',
      'PRICE',  'REVENUE', 'COUNT']]
with open(path,'wb') as f:
  pickle.dump(temp,f)



