## Solar Data Files Reformatting

###### change csv files to excel files (no need to use this, cause we want to work with .csv first)

In [None]:
import pandas as pd
import os
# change csv files to excel files:
def csv_to_excel(filespath):
    for filename in os.listdir(filespath): # os.listdir(path): return file names
        file_path = os.path.join(filespath, filename)  
        if file_path.endswith(".csv"):
                csv_file = pd.read_csv(file_path)
                csv_file.to_excel(filename[:-3]+'xlsx')
                # os.remove(file_path) #delete .csv files

##### seperate/add columns

In [12]:
import os
from datetime import date
import time
import pandas as pd


def shape_df(file): # input a file
    
    # if its .csv file:
    # df = pd.read_csv(file) #  dataframe
    
    # if it's excel file:
    df = pd.read_excel(file) #  dataframe
    
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # seperate name by '_' and convert it in different columns, 
        # eg.'ZoneA_GHI_AWS' to 'ZoneA' 'GHI' 'AWS' 3 different columns
    df_name = df['IMPORTID'].str.split(pat = '_',expand=True)
    df["Zone"] = df_name[0].str.strip().str[-1] #only use first column eg. A 
    df["Type"] = df_name[1] # and sencond column eg. GHI
    del df['IMPORTID']  # delete 'IMPORTID' column
    
    # Change the format of time:
    df['DT'] = pd.to_datetime(df['DT'])
    df['Date'] =  df['DT'].dt.date
    df["Year"] = df['DT'].dt.year #get year
    df["Month"] = df['DT'].dt.month #get month
    df["Day"] = df['DT'].dt.day #get day
    df["Time"] = df['DT'].dt.time  #get time
    del df['DT']        # delete 'DT' column
    
    df = df.sort_values(by = ['Zone','Type','Date','Time']).reset_index(drop = True)
    return df # return a dataframe

#### add missing rows

    Zone A to K: miss GHI, POA20, POA30 from 2017/01/01 ~ 2017/06/30
    Zone Z: misses BTM, GHI, POA20, POA30 from 2017/01/01 ~ 2017/06/30

In [14]:
def add_missing(df):
    solar2017 = df.loc[(df['Date'] < date(2017,7,1))]
    df_add_raw = pd.DataFrame(columns = solar2017.columns)

    Zone_list = ['A','B','C','D','E','F','G','H','I','J','K']
    Type_list = ['GHI','POA20','POA30']

    # build a df 
    if len(solar2017)>0:
        for Zone in Zone_list:
            for j in Type_list:
                df_add_raw = pd.DataFrame(columns = solar2017.columns) # create a dateframe
                idx = pd.date_range(start = '2017-01-01', end = '2017-6-30 23:45:00', freq = '15T')
                df_add_raw['Date'] = idx.date
                df_add_raw['Year'] = idx.year
                df_add_raw['Month'] = idx.month
                df_add_raw['Day'] = idx.day
                df_add_raw['Time'] = idx.time
                df_add_raw['Zone'] = ["%s"%(Zone)]*len(idx)
                df_add_raw['Type'] = ["%s"%(j)]*len(idx) 
                df = df.append(df_add_raw, ignore_index = True)
                
    df = df.sort_values(by = ['Zone','Type','Date','Time']).reset_index(drop = True)
    return df

#### seperate/add columns by Types; and seperate files by Zones

In [25]:
import os

def seperate_types_files(file, files_dir): # input is a file, and files_dir: where you want to store the new files
    # get the file name:
    filename = os.path.basename(file)
    filename = os.path.splitext(filename)[0]
    
    os.chdir(files_dir) # change the path to the directory

    # if the zonal file is not in the directory:


    df =  shape_df(file) # use function above to get the modified dataframe
    #if has missing value in 2017 then use this:
    #df = add_missing(df) # use function above to get missing value
    

    Zone_list = list(df['Zone'].unique()) # unique Zonal names
    Zone_list.sort()
    Type_list = list(df['Type'].unique()) # unique Types names
    Type_list.sort()

    #add hour flag column:
    df['HrFlag'] = df['Time'].map(lambda x: 1 if x.hour >= 6 and x.hour <=19  else 0)

    for i in Zone_list:
        df_add = pd.DataFrame(columns = Type_list) # create a dateframe
        file_cut = df[df['Zone']==i] # Seperate Zones

        # Seperate Types to columes, and add them to a new dataframe:
        for j in Type_list: 
            type_cut = file_cut.loc[file_cut['Type']==j]  
            df_add[j] = type_cut['READING'].values

        file_cut = file_cut.drop_duplicates(['Date','Time']).reset_index(drop = True) # Delete duplicated raws

        del file_cut['Type'] # drop Types column
        del file_cut['READING'] # drop READING column

        final = pd.concat([file_cut,df_add], axis = 1, ignore_index=True) # combine the new dataframe to old one

        # Rename the final dataframe: (depend on the output)
        final.columns =  ['Zone', 'Date','Year', 'Month', 'Day', 'Time','HrFlag','BTM', 'GHI','POA20','POA30']
        # final.columns =  ['Zone', 'Date','Year', 'Month', 'Day', 'Time','HrFlag','BTM'] 

        # If seperate zonal files:
        #final.to_excel(r"%s_%s.xlsx"%(i, filename), 'sheet1',index=False) # create a excel file

        #deal with any kind of files:
        final.to_excel(r"%s.xlsx"%(i), 'sheet1',index=False) # create a excel file

#### deal with all files

In [26]:
files_dir_old = 'C:/Users/zhongj/Desktop/New folder' # change the directory

files_dir_new = "C:/Users/zhongj/Desktop/New folder3" # change the directory


for filename in os.listdir(files_dir_old):
    file_path = os.path.join(files_dir_old, filename)
    seperate_types_files(file_path, files_dir_new)


#### Combine files 
(I didn't use this code, because combine directly using excel, we can change the format of date, but this code can't)

In [None]:
files_dir_old = "C:/Users/zhongj/Desktop/SolarData_Edit" # change the directory

files_dir_new= 'C:/Users/zhongj/Desktop/Zonal_SolarData' # change the directory

import xlrd
import xlsxwriter

Zones = ['A_Solar_Data','B_Solar_Data','C_Solar_Data','D_Solar_Data','E_Solar_Data','F_Solar_Data',
         'G_Solar_Data','H_Solar_Data','I_Solar_Data','J_Solar_Data','K_Solar_Data','S_Solar_Data']

In [None]:
data = [] # a list
for zone in Zones: # for each zone
    
    for filename in os.listdir(files_dir_old): # for each files
        file_path = os.path.join(files_dir_old, filename)
        
        # if the zone name in file names:
        if zone in filename:  
            wb = xlrd.open_workbook(file_path) #open excel file
            sheet =  wb.sheets()[0] #only have one sheet
            
            for rownum in range(sheet.nrows):
                data.append(sheet.row_values(rownum)) # put data in the list
                
    os.chdir(files_dir_new)
    
    #create an excel:
    workbook = xlsxwriter.Workbook(r"%s.xlsx"%(zone))
    worksheet = workbook.add_worksheet()
    # font = workbook.add_format({'font_size':12})
    
    # write the data into excel:
    for i in range(len(data)):
        for j in range(len(data[i])):
            worksheet.write(i, j, data[i][j])
    workbook.close()