# Reading the downloaded files

# Table of Contents
* [1. Settings](#1.-Settings)
	* [1.1 Libraries](#1.1-Libraries)
* [2. Parameters for individual data sources](#2.-Parameters-for-individual-data-sources)
	* [2.1 '50Hertz](#2.1-'50Hertz)
	* [2.2 Amprion](#2.2-Amprion)
	* [2.3 TenneT](#2.3-TenneT)
	* [2.4 TransnetBW](#2.4-TransnetBW)
* [3. Reading files one by one](#3.-Reading-files-one-by-one)
	* [3.1 Create an empty DataFrame](#3.1-Create-an-empty-DataFrame)
	* [3.2 Apply the processing function one-by-one](#3.2-Apply-the-processing-function-one-by-one)


# 1. Settings

## 1.1 Libraries

Loading some python libraries.

In [42]:
from datetime import datetime, date
import pytz
import yaml
import os
import numpy as np
import pandas as pd
import logging

Set up a log.

In [43]:
logger = logging.getLogger('log')
logger.setLevel('INFO')

# 2. Parameters for individual data sources

This section contains a python dictionary indicating which datasources there are and which data types they provide

In [46]:
conf = """
ENTSO-E: [Data_Portal]
50Hertz: [wind, pv]
Amprion: [wind, pv]
TenneT: [wind, pv]
TransnetBW: [wind, pv]
"""
conf = yaml.load(conf)

In [48]:
def read_entso(filepath, tech):
    data = pd.read_excel(
        io=filepath,
        header=9,
        skiprows=None,
        index_col=[0, 1],
        parse_cols = None #None means: parse all columns
        )
    
    dst_transition_times = [d.replace(hour=2) for d in pytz.timezone(
            'Europe/Berlin')._utc_transition_times[1:]]
    
    # The original data has days and countries in the rows and hours in the
    # columns.  This rearranges the table, mapping hours on the rows and
    # countries on the columns.  
    data = data.stack(level=None).unstack(level='Country').reset_index()    
    # The information on the hour of the day a datapoint belongs to has been
    # put in a new column named 'level_1' by the DataFrame.stack() method.  
    data.rename(columns={'level_1': 'raw_hour'}, inplace=True)
    
    # Truncate the hours column after 2 characters and replace letters 
    # which are there to indicate the order during fall dst-transition.      
    data['hour'] = data['raw_hour'].str[:2].str.replace('A','').str.replace('B','')
    # Hours are indexed 1-24 by ENTSO-E, but pandas requires 0-23, so we deduct 1.
    data['hour'] = (data['hour'].astype(int) - 1).astype(str)
    
    data['dt_index'] = pd.to_datetime(data['Day']+' '+data['hour']+':00')
    data.set_index('dt_index', inplace=True)    
    
    # Drop 2nd occurence of 03:00 appearing in October data except for autumn
    # dst-transition
    data = data[~((data['raw_hour'] == '3B:00:00') & ~(
                data.index.isin(dst_transition_times)))]
    # Drop 03:00 for (spring) dst-transition. October data is unaffected because
    # the format is 3A:00/3B:00 
    data = data[~((data['raw_hour'] == '03:00:00') & (
                data.index.isin(dst_transition_times)))]
    
    data.index = data.index.tz_localize('Europe/Berlin', ambiguous='infer')
    data.drop(['Day', 'hour', 'raw_hour'], axis=1, inplace=True)
    data.rename(columns=lambda country: 'load_' + country, inplace=True)
    data = data.replace(to_replace='n.a.', value=np.nan)
    
    return data

## 2.1 '50Hertz

In [49]:
def read_hertz(filepath, tech):
    data = pd.read_csv(
        filepath,
        sep=';',
        header=3,
        index_col='dt_index',
        names=[
            'date',
            'time',
            tech + '_DE50hertz_actual'
            ],
        parse_dates={'dt_index': ['date', 'time']},
        date_parser=None,
        dayfirst=True,
        decimal=',',
        thousands='.',
        # truncate values in 'time' column after 5th character
        converters={'time': lambda x: x[:5]},
        usecols=[0, 1, 3],
    )
    
    # Until 2006 as well as  in 2015, during the fall dst-transistion, only the 
    # wintertime hour (marked by a B in the data) is reported, the summertime 
    # hour, (marked by an A) is missing in the data.  
    # dst_arr is a boolean array consisting only of "False" entries, telling 
    # python to treat the hour from 2:00 to 2:59 as wintertime.
    if pd.to_datetime(data.index.values[0]).year not in range(2007,2015):
        dst_arr = np.zeros(len(data.index), dtype=bool)
        data.index = data.index.tz_localize('Europe/Berlin', ambiguous=dst_arr)
    else:
        data.index = data.index.tz_localize('Europe/Berlin', ambiguous='infer')
    
    return data            

## 2.2 Amprion

In [59]:
def read_amprion(filepath, tech):
    data = pd.read_csv(
        filepath,
        sep=';',
        header=0,
        index_col='dt_index',
        names=[
            'date',
            'time',
            tech + '_DEamprion_forecast',
            tech + '_DEamprion_actual'
            ],
        parse_dates={'dt_index' : ['date', 'time']},
        date_parser=None,
        dayfirst=True,
#        decimal=',', #shouldn't be relevant
        thousands=None,
        # Truncate values in 'time' column after 5th character.
        converters={'time': lambda x: x[:5]},
        usecols=[0, 1, 2, 3],        
    )

    index1 = data.index[data.index.year <= 2009]
    index1 = index1.tz_localize('Europe/Berlin', ambiguous='infer')
    
    # In the years after 2009, during the fall dst-transistion, only the
    # summertime hour is reported, the wintertime hour is missing in the data.  
    # dst_arr is a boolean array consisting only of "True" entries, telling 
    # python to treat the hour from 2:00 to 2:59 as summertime.
    index2 = data.index[data.index.year > 2009]
    dst_arr = np.ones(len(index2), dtype=bool)
    index2 = index2.tz_localize('Europe/Berlin', ambiguous=dst_arr)        
    data.index = index1.append(index2)
    
    return data

## 2.3 TenneT

The Tennet Data doesn't feature a time column. Instead, the quarter-hourly data entries for each day are numbered by their position, creating an index ranging...
* from 1 to 96 on normal days,
* from 1 to 92 on spring dst-transition dates,
* from 1 to 100 on fall dst-transition days.

This index can be used to compute a timestamp. However, there are a couple of errors in the data, which is why a lot of exceptions need to be specified.

In [52]:
def read_tennet(filepath, tech):
    data = pd.read_csv(
        filepath,
        sep=';',
        encoding='latin_1',
        header=3,
        index_col=None,
        names=[
            'date',
            'position',
            tech + '_DEtennet_forecast',
            tech + '_DEtennet_actual',
            tech + '_DEtennet_offshore_share'
            ],
        parse_dates=False,
        date_parser=None,
        dayfirst=True,
#       decimal=',', #shouldn't be relevant
        thousands=None,
        converters=None,          
        usecols=[0, 1, 2, 3, 4],
    )

    data['date'].fillna(method='ffill', limit = 100, inplace=True)

    for i in range(len(data.index)):
        # On the day in March when summertime begins, shift the data forward by
        # 1 hour, beginning with the 9th quarter-hour, so the index runs again
        # up to 96
        if (data['position'][i] == 92 and (
                (i == len(data.index)-1) or
                (data['position'][i + 1] == 1)
                )
           ):
            slicer = data[
                (data['date'] == data['date'][i]) &
                (data['position'] >= 9)
                ].index
            data.loc[slicer, 'position'] = data['position'] + 4

        if data['position'][i] > 96: # True when summertime ends in October
            logger.info('%s th quarter-hour at %s, position %s',data[
                    'position'][i], data.ix[i,'date'], (i))  

            # Instead of having the quarter-hours' index run up to 100, we want 
            # to have it set back by 1 hour beginning from the 13th
            # quarter-hour, ending at 96
            if (data['position'][i] == 100 and not
                    (data['position'] == 101).any()):                    
                slicer = data[
                    (data['date'] == data['date'][i]) &
                    (data['position'] >= 13)
                    ].index
                data.loc[slicer, 'position'] = data['position'] - 4                     

            # In 2011 and 2012, there are 101 qaurter hours on the day the 
            # summertime ends, so 1 too many.  From looking at the data, we
            # inferred that the 13'th quarter hour is the culprit, so we drop
            # that.  The following entries for that day need to be shifted.
            elif data['position'][i] == 101: 
                data = data[~(
                    (data['date'] == data['date'][i]) &
                    (data['position'] == 13)
                    )]
                slicer = data[
                    (data['date'] == data['date'][i]) &
                    (data['position'] >= 13)
                    ].index
                data.loc[slicer, 'position'] = data['position'] - 5         

    # On 2012-03-25, there are 94 entries, where entries 8 and 10 are probably
    # wrong.
    if data['date'][0] == '2012-03-01':
        data = data[~(
            (data['date'] == '2012-03-25') & (
                (data['position'] == 8) |
                (data['position'] == 10)
                )
            )]
        slicer = data[
            (data['date'] == '2012-03-25') & 
            (data['position'] >= 9)
            ].index
        data.loc[slicer, 'position'] = [8] + list(range(13, 97))        

    # On 2012-09-27, there are 97 entries.  Probably, just the 97th entry is wrong.
    if data['date'][0] == '2012-09-01':
        data = data[~(
            (data['date'] == '2012-09-27') &
            (data['position'] == 97)
            )]          

    # Here we compute the timestamp from the position and generate the
    # datetime-index
    data['hour'] = (np.trunc((data['position']-1)/4)).astype(int).astype(str)
    data['minute'] = (((data['position']-1)%4)*15).astype(int).astype(str)
    data['dt_index'] = pd.to_datetime(
        data['date'] + ' ' +
        data['hour'] + ':' +
        data['minute'],
        dayfirst = True
        )
    data.set_index('dt_index',inplace=True)

    # In the years 2006, 2008, and 2009, the dst-transition hour in March
    # appears as empty rows in the data.  We delete it from the set in order to
    # make the timezone localization work.  
    for crucial_date in pd.to_datetime([
            '2006-03-26',
            '2008-03-30',
            '2009-03-29'
            ]).date:
        if data.index[0].year == crucial_date.year:
            data = data[~(
                (data.index.date == crucial_date) &
                (data.index.hour == 2)
                )]

    data.index = data.index.tz_localize('Europe/Berlin', ambiguous='infer')

    data.drop(['position', 'date', 'hour', 'minute'], axis=1, inplace=True)
    if tech == 'pv':
        data.drop('pv_DEtennet_offshore_share', axis=1, inplace=True)

    return data

## 2.4 TransnetBW

In [62]:
def read_transnetbw(filepath, tech):
    data = pd.read_csv(
        filepath,
        sep=';',
        header=0,
        index_col='dt_index',
        names=[
            'date',
            'time',
            source + '_' + tech + '_forecast',
            source + '_' + tech + '_actual'
            ],
        parse_dates={'dt_index': ['date', 'time']},
        date_parser=None,         
        dayfirst=True,
        decimal=',',
        thousands=None,
        converters=None,
        usecols=[2, 3, 4, 5],
    )

    # the time taken from column 3 indicates the end of the respective period.
    # to construct the index, however, we need the beginning, so we shift the 
    # data by 1 period.  
    data = data.shift(periods=-1, axis='index')
    data.index = data.index.tz_localize('Europe/Berlin', ambiguous='infer')
    # 'ambigous' refers to how the October dst-transition hour is handled.  
    # ‘infer’ will attempt to infer dst-transition hours based on order.
        
    return data

# 3. Reading files one by one

## 3.1 Create an empty DataFrame

Create an empty DataFrame / reset the DataFrame

In [54]:
downloadpath = 'downloads2'

In [64]:
data_set = pd.DataFrame()

## 3.2 Apply the processing function one-by-one

For each source/TSO and technology specified in the conf dict, this section finds all the downloaded files in the downloads folder and then calls the matching readData function.
The datasets returned by the read function are then merged into one large dataset.

In [65]:
for source, techlist in conf.items():
    for tech in techlist:
        for subdir in os.listdir(os.path.join(downloadpath, source, tech)):
            files = os.listdir(os.path.join(downloadpath, source, tech, subdir))
            if len(files) == 1:
                logger.info('reading %s %s %s', source, tech, files[0])
                filepath = os.path.join(downloadpath, source, tech, subdir, files[0])
                if os.path.getsize(filepath) < 128:
                    logger.info('file is smaller than 128 Byte,' +
                            'which means it is probably empty')
                else:
                    if source == 'ENTSO-E':
                        data_to_add = read_entso(filepath, tech)
                    elif source == 'Svenska_Kraftnaet':
                        data_to_add = read_svenskakraftnaet(filepath, source, tech)
                    elif source == '50Hertz':
                        data_to_add = read_hertz(filepath, tech)
                    elif source == 'Amprion':
                        data_to_add = read_amprion(filepath, tech)
                    elif source == 'TenneT':
                        data_to_add = read_tennet(filepath, tech)
                    elif source == 'TransnetBW':
                        data_to_add = read_transnetbw(filepath, tech)

                    data_set = data_set.combine_first(data_to_add)

INFO:log:reading TenneT wind Export_Januar-2006.csv
INFO:log:reading TenneT wind Export_Februar-2006.csv
INFO:log:reading TenneT wind Export_Maerz-2006.csv
INFO:log:reading TenneT wind Export_April-2006.csv
INFO:log:reading TenneT wind Export_Mai-2006.csv
INFO:log:reading TenneT wind Export_Juni-2006.csv
INFO:log:reading TenneT wind Export_Juli-2006.csv
INFO:log:reading TenneT wind Export_August-2006.csv
INFO:log:reading TenneT wind Export_September-2006.csv
INFO:log:reading TenneT wind Export_Oktober-2006.csv
INFO:log:97 th quarter-hour at 2006-10-29, position 2784
INFO:log:98 th quarter-hour at 2006-10-29, position 2785
INFO:log:99 th quarter-hour at 2006-10-29, position 2786
INFO:log:100 th quarter-hour at 2006-10-29, position 2787
INFO:log:reading TenneT wind Export_November-2006.csv
INFO:log:reading TenneT wind Export_Dezember-2006.csv
INFO:log:reading TenneT wind Export_Januar-2007.csv
INFO:log:reading TenneT wind Export_Februar-2007.csv
INFO:log:reading TenneT wind Export_Maerz-



INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading ENTSO-E Data_Portal Statistics.xls




INFO:log:reading 50Hertz wind 2005.csv
INFO:log:reading 50Hertz wind 2006.csv
INFO:log:reading 50Hertz wind 2007.csv
INFO:log:reading 50Hertz wind 2008.csv
INFO:log:reading 50Hertz wind 2009.csv
INFO:log:reading 50Hertz wind 2010.csv
INFO:log:reading 50Hertz wind 2011.csv
INFO:log:reading 50Hertz wind 2012.csv
INFO:log:reading 50Hertz wind 2013.csv
INFO:log:reading 50Hertz wind 2014.csv
INFO:log:reading 50Hertz wind 2015.csv
INFO:log:reading 50Hertz pv 2012.csv
INFO:log:reading 50Hertz pv 2013.csv
INFO:log:reading 50Hertz pv 2014.csv
INFO:log:reading 50Hertz pv 2015.csv





In [66]:
data_set.head()

Unnamed: 0_level_0,TransnetBW_pv_actual,TransnetBW_pv_forecast,TransnetBW_wind_actual,TransnetBW_wind_forecast,load_AT,load_BA,load_BE,load_BG,load_CH,load_CS,...,pv_DEamprion_actual,pv_DEamprion_forecast,pv_DEtennet_actual,pv_DEtennet_forecast,wind_DE50hertz_actual,wind_DEamprion_actual,wind_DEamprion_forecast,wind_DEtennet_actual,wind_DEtennet_forecast,wind_DEtennet_offshore_share
dt_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-01 00:00:00+01:00,,,,,,,,,,,...,,,,,1243,,,,,
2005-01-01 00:15:00+01:00,,,,,,,,,,,...,,,,,1243,,,,,
2005-01-01 00:30:00+01:00,,,,,,,,,,,...,,,,,1243,,,,,
2005-01-01 00:45:00+01:00,,,,,,,,,,,...,,,,,1243,,,,,
2005-01-01 01:00:00+01:00,,,,,,,,,,,...,,,,,1270,,,,,


In [67]:
data_set.to_csv('raw_data.csv', sep=',', float_format='%.2f', decimal='.', date_format='%Y-%m-%dT%H:%M:%S%z')