In [2]:
import urllib
import zlib
import os
import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta

In [3]:
pd.set_option("display.max_columns", None)

In [4]:
import gzip

One year of monthly weather data was downloaded in the folder "ObservedData" for 2003 
from Meteo France Archives of in situ observations.

### Decompressing and filtering for Nantes=7222 .gz downloaded grided weatherfiles

In [80]:
cwd = os.path.abspath(r'D:\Weather_files\ObservedData2003') 
files = os.listdir(cwd)

In [81]:
df = pd.DataFrame()
for file in files:
    if file.endswith('.gz'):
        f=gzip.open('D:/Weather_files/ObservedData2003/'+str(file), 'rb')
        df_W=pd.read_csv(f,delimiter=';')
        df_output = df_W[df_W.numer_sta == 7222] # weather station code for Nantes
        df_output.loc[:,"date"] = pd.to_datetime(df_output["date"], format = "%Y%m%d%H%M%S")
        df_output.index = df_output["date"]
        df_output=df_output.drop(["date", "numer_sta"], axis = 1)
        df_output.to_csv('D:/Weather_files/ObservedData2003/'+str(file)+".csv")
        df = df.append(df_output)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


### Transforming three hour weather data to hourly and creating an epw weather file from the given data

In [None]:
#'date'       :Datetime 
#'numer_sta'  :number of weather station (Nantes = 7222)
#'pmer'     :PRESSURE AT SEA LEVEL (Pression au niveau mer)[pa]
#'tend '    :presssure variations in every 3 hours(Variation de pression en 3 heures)[pa]
#'cod_tend ':Type de tendance barométrique [code (0200)]
#'dd'       :WIND DIRECTION (Direction du vent moyen 10 mn [degrees])
#'ff'       :WIND SPEED (Vitesse du vent moyen 10 mn [m/s])
#'t'        :DRY BULB TEMPERATURE (Température [K])
#'td'       :DEW POINT TEMPERATURE (Point de rosée [K])
#'u',        :REATIVE HUMIDITY (Humidité [%])
#'vv',       :HORIZONTAL VISBILITY (Visibilité horizontale [m])
#'ww',       :Temps présent [code (4677)]
#'w1',       :Temps passé 1 [code (4561)]
#'w2',       :Temps passé 2 [code (4561)]
#'n',        :TOTAL CLOUD COVER (Nebulosité totale [%])
#'nbas',     :OPAQUE SKY COVER measure (1/10) (Nébulosité des nuages de l'étage inférieur [octa] measured (1/8))
#'hbas',     :Hauteur de la base des nuages de l'étage inférieur [m]
#'cl',       :Type des nuages de l'étage inférieur
#'cm',       :Type des nuages de l'étage moyen
#'ch',       :Type des nuages de l'étage supérieur
#'pres',     :ATMOSPHERIC STATION PRESSURE (Pression station [pa])
#'niv_bar',  :Niveau barométrique [pa]
#'geop',     :Géopotentiel [m2/s2]
#'tend24',   :Variation de pression en 24 heures [pa]
#'tn12',     :Température minimale sur 12 heures [K]
#'tn24',     :Température minimale sur 24 heures [K]
#'tx12',     :Température maximale sur 12 heures [K]
#'tx24',     :Température maximale sur 24 heures [K]
#'tminsol',  :Température minimale du sol sur 12 heures [K]
#'sw',       :Méthode mesure tw 
#'tw',       :Wet thermometer temperature (Température du thermomètre mouillé [K])
#'raf10',    :Rafales sur les 10 dernières minutes [m/s]
#'rafper',   :Rafales sur une période [m/s]
#'per',      :Période de mesure de la rafale [min]
#'etat_sol', :Etat du sol
#'ht_neige', :Hauteur totale de la couche de neige, glace, autre au sol [m]
#'ssfrai',   :Hauteur de la neige fraîche [m]
#'perssfrai',:Periode de mesure de la neige fraiche [1/10 hour]
#'rr1',      :LIQUID PRECIPITATION DEPTH (Précipitations dans les 1 dernières heures [mm])
#'rr3',      : =
#'rr6',      : =
#'rr12',     : =
#'rr24',     : =
#'phenspe1', :Phénomène spécial 1 [code (3778)]
#'phenspe2', : =
#'phenspe3', : =
#'phenspe4', : =
#'nnuage1',  :Nébulosité cche nuageuse N [octa] 
#'ctype1',   :Type nuage N [code (0500)]
#'hnuage1',  :Hauteur de base N [m]
#'nnuage2',  : =
#'ctype2',   : =
#'hnuage2',  : =
#'nnuage3',  : =
#'ctype3',   : =
#'hnuage3',  : =
#'nnuage4',  : =
#'ctype4',   : =
#'hnuage4',  : =
#'Unnamed: 59':

In [83]:
## replace mq to NaN 
df = df.replace('mq', np.nan, regex=True)

In [84]:
df=df.resample('H').last()# resampling doesn't go past the last index. 
                          #Parameters of weather file from MeteoFrance archives

In [85]:
df['t'] = df.t.astype(float)
df['td'] = df.td.astype(float)
df['u'] = df.u.astype(float)
df['pres'] = df.pres.astype(float)
df['n'] = df.n.astype(float)
df['dd'] = df.dd.astype(float)
df['ff'] = df.ff.astype(float)

In [86]:
##Interpolation of 3hr data to hourly weather data 
#dry bulb temperature 
df['t'].interpolate(method='polynomial', order=7, inplace=True)
# dew point temperature
df['td'].interpolate(method='polynomial', order=7, inplace=True)
# relative humidity 
df['u'].interpolate(method='linear', limit_direction='both',limit=1, inplace=True)
# Atmospheric pressure 
df['pres'].interpolate(method='linear', limit_direction='both',limit=1, inplace=True)
# Cloud cover /total /opaque SKY cover 
df['n'].interpolate(method='linear', limit_direction='both',limit=1, inplace=True)
## Wind direction 
df['dd'].interpolate(method='linear', limit_direction='both',limit=1, inplace=True)
## Wind speed 
df['ff'].interpolate(method='linear', limit_direction='both',limit=1, inplace=True)

In [87]:
## year, month, day, hour to dataframe from index 
df['Year']=df.index.year
df['Month']=df.index.month
df['Day']=df.index.day
df['Hour']=df.index.hour

In [88]:
# TOtal cloud cover is measured in 1/10
df['TotalCloudCover']=round(df['n']/10,0)
# Opaque cloud cover is measured in 1/9
df['OpaqueCloudCover']=round(df['n']/11.2,0)

In [1]:
#df.head()

In [90]:
df.columns

Index(['pmer', 'tend', 'cod_tend', 'dd', 'ff', 't', 'td', 'u', 'vv', 'ww',
       'w1', 'w2', 'n', 'nbas', 'hbas', 'cl', 'cm', 'ch', 'pres', 'niv_bar',
       'geop', 'tend24', 'tn12', 'tn24', 'tx12', 'tx24', 'tminsol', 'sw', 'tw',
       'raf10', 'rafper', 'per', 'etat_sol', 'ht_neige', 'ssfrai', 'perssfrai',
       'rr1', 'rr3', 'rr6', 'rr12', 'rr24', 'phenspe1', 'phenspe2', 'phenspe3',
       'phenspe4', 'nnuage1', 'ctype1', 'hnuage1', 'nnuage2', 'ctype2',
       'hnuage2', 'nnuage3', 'ctype3', 'hnuage3', 'nnuage4', 'ctype4',
       'hnuage4', 'Unnamed: 59', 'Year', 'Month', 'Day', 'Hour',
       'TotalCloudCover', 'OpaqueCloudCover'],
      dtype='object')

In [91]:
##Rearranging columns and selecting only a few that we use in modification of weather file
Columns=['Year', 'Month', 'Day', 'Hour','dd', 'ff', 't', 'td','u','n', 'pres','TotalCloudCover', 'OpaqueCloudCover']
df=df[Columns]
#  year is not a leap year, if you dealing with a leap year, in order to make it fit with typical
#  weather file, the leap day needs to be removed. (typical weather years are not leap years)
#  Saving data to a CSV and adding TWO LAST HOURS of the year manually 
#  because the limit value in interpolation was 1. # users could increase this value to 3 to also account for the last two hours
df.to_csv('D:/Weather_files/ObservedData2003/2003_Nantes.csv',index=True)

### Writing data from CSV to EPW weather file for 2003

In [6]:
df2=pd.read_csv('D:/Weather_files/ObservedData2003/2003_Nantes.csv')

In [7]:
## missing data are filled with automatic closest neighbor 
df2=df2.fillna(method="ffill")

In [8]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 14 columns):
date                8760 non-null object
Year                8760 non-null int64
Month               8760 non-null int64
Day                 8760 non-null int64
Hour                8760 non-null int64
dd                  8760 non-null float64
ff                  8760 non-null float64
t                   8760 non-null float64
td                  8760 non-null float64
u                   8760 non-null float64
n                   8760 non-null float64
pres                8760 non-null float64
TotalCloudCover     8760 non-null float64
OpaqueCloudCover    8760 non-null float64
dtypes: float64(9), int64(4), object(1)
memory usage: 958.2+ KB


In [9]:
df2['TotalCloudCover'] = df2.TotalCloudCover.astype(int)
df2['OpaqueCloudCover']=df2.OpaqueCloudCover.astype(int)
df2['WindDirection']=df2['dd'].astype(int)
df2['DryBulbTemp']=df2['t']-273.15
df2['DewPointTemp']=df2['td']-273.15

In [12]:
## Writing the weather data on epw weather template
from ladybug.epw import EPW
from pandas import DataFrame
from collections import OrderedDict
%config Completer.use_jedi = False

In [13]:
# Checking epw weather file data 
epwFile=EPW(r'D:\Weather_files\ObservedData2003\FRA_Nantes_072220_IWEC.epw')
epwDataList=epwFile.to_dict()['data_collections']
epwDataDict = OrderedDict()

for dataColumns in epwDataList:
    dataName=dataColumns['header']['data_type']['name']
    epwDataDict[dataName]=dataColumns['values']

epwDataFrame = DataFrame(epwDataDict)
epwDataFrame.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,Uncertainty Flags,Dry Bulb Temperature,Dew Point Temperature,Relative Humidity,Atmospheric Station Pressure,Extraterrestrial Horizontal Radiation,Extraterrestrial Direct Normal Radiation,Horizontal Infrared Radiation Intensity,Global Horizontal Radiation,Direct Normal Radiation,Diffuse Horizontal Radiation,Global Horizontal Illuminance,Direct Normal Illuminance,Diffuse Horizontal Illuminance,Zenith Luminance,Wind Direction,Wind Speed,Total Sky Cover,Opaque Sky Cover,Visibility,Ceiling Height,Present Weather Observation,Present Weather Codes,Precipitable Water,Aerosol Optical Depth,Snow Depth,Days Since Last Snowfall,Albedo,Liquid Precipitation Depth,Liquid Precipitation Quantity
0,2005,12,31,24,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,5.7,5.5,98,100879,0,0,312,0,0,0,0,0,0,9999,223,1.3,10,9,9999.0,99999,9,999999999,14,0.103,0,5,0.2,1.7,99.0
1,2005,1,1,1,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,11.0,9.2,88,100879,0,0,332,0,0,0,0,0,0,9999,218,2.4,10,9,9999.0,99999,9,999999999,18,0.092,0,4,0.2,1.5,99.0
2,2005,1,1,2,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,11.1,8.0,81,100879,0,0,327,0,0,0,0,0,0,9999,252,1.9,10,9,9999.0,99999,9,999999999,16,0.092,0,4,0.2,0.0,99.0
3,2005,1,1,3,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,10.9,7.5,80,100879,0,0,325,0,0,0,0,0,0,9999,249,1.6,10,9,9999.0,99999,9,999999999,16,0.092,0,4,0.2,0.0,99.0
4,2005,1,1,4,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,10.8,7.4,79,100879,0,0,324,0,0,0,0,0,0,9999,310,1.9,10,9,9999.0,99999,9,999999999,16,0.092,0,4,0.2,0.0,99.0


In [14]:
## ratio of opaque sky cover 
print(epwDataFrame['Total Sky Cover'].unique())
epwDataFrame['Opaque Sky Cover'].unique()

[10  9  8  6  5  4  3  1  0]


array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0], dtype=int64)

In [15]:
# Name and indices of columns in a typical epw weather file
#field_number: index value between 0 to 34 for different available epw fields.
#            0 Year
#            1 Month
#            2 Day
#            3 Hour
#            4 Minute

#            6 Dry Bulb Temperature
#            7 Dew Point Temperature
#            8 Relative Humidity
#            9 Atmospheric Station Pressure
#            10 Extraterrestrial Horizontal Radiation
#            11 Extraterrestrial Direct Normal Radiation
#            12 Horizontal Infrared Radiation Intensity
#            13 Global Horizontal Radiation
#            14 Direct Normal Radiation
#            15 Diffuse Horizontal Radiation
#            16 Global Horizontal Illuminance
#            17 Direct Normal Illuminance
#            18 Diffuse Horizontal Illuminance
#            19 Zenith Luminance
#            20 Wind Direction
#            21 Wind Speed
#            22 Total Sky Cover
#            23 Opaque Sky Cover
#            24 Visibility
#            25 Ceiling Height
#            26 Present Weather Observation
#            27 Present Weather Codes
#            28 Precipitable Water
#            29 Aerosol Optical Depth
#            30 Snow Depth
#            31 Days Since Last Snowfall
#            32 Albedo
#            33 Liquid Precipitation Depth
#            34 Liquid Precipitation Quantity

In [16]:
def changeEPWData(oldEpwFilePath,newEpwFilePath,dataIndex,dataList):
    with open(oldEpwFilePath) as oldStream,open(newEpwFilePath,"w") as newStream:
        numCount=0
        for idx,lines in enumerate(oldStream):
            if lines.strip():
                try:
                    lineSplit=lines.strip().split(",")
                    dataTest=float(lineSplit[0])
                    lineSplit[dataIndex]=str(dataList[numCount])
                    data=",".join(lineSplit)
                    newStream.write(data+"\n")
                    numCount+=1
                except ValueError:
                    newStream.write(lines.strip()+"\n")
            else:
                newStream.write(lines)
    return newEpwFilePath

In [None]:
#Writing column by column of weather file to make sure the structure of EPW file doesn't change
#a user can decide to modify one climate variable or multiples climate variables

In [None]:
# In this script a temporary folder was created to save modified versions of EPW weather file.
# With every new climate variable, previously modified file from temporary folder is selected and
# further overwritten until all climate variables are overwritten. 

In [17]:
### Replacing year
changeEPWData(r'D:\Weather_files\ObservedData\FRA_Nantes_072220_IWEC.epw',
              r'D:\Weather_files\ObservedData\Temporary\year.epw',
              dataIndex=0,
              dataList=df2.Year)

In [18]:
###Writing column by column of weather file to make sure the structure of it doesn't change
### Replacing month
#changeEPWData(r'D:\Weather_files\ObservedData\Temporary\year.epw',
#              r'D:\Weather_files\ObservedData\Temporary\month.epw',
#              dataIndex=1,
#              dataList=df2.Month)

In [19]:
###Writing column by column of weather file to make sure the structure of it doesn't change
### Replacing Day
#changeEPWData(r'D:\Weather_files\ObservedData\Temporary\month.epw',
#              r'D:\Weather_files\ObservedData\Temporary\day.epw',
#              dataIndex=2,
#              dataList=df2.Day)

In [20]:
###Writing column by column of weather file to make sure the structure of it doesn't change
### Replacing hour
#changeEPWData(r'D:\Weather_files\ObservedData\Temporary\day.epw',
#              r'D:\Weather_files\ObservedData\Temporary\hour.epw',
#              dataIndex=3,
#              dataList=df2.Hour)

In [21]:
##Writing column by column of weather file to make sure the structure of it doesn't change
## Replacing dry bulb temperature
changeEPWData(r'D:\Weather_files\ObservedData2003\year.epw',
              r'D:\Weather_files\ObservedData2003\Temporary\DryBTemp.epw',
              dataIndex=6,
              dataList=df2.DryBulbTemp)

'D:\\Weather_files\\ObservedData\\Temporary\\DryBTemp.epw'

In [22]:
##Writing column by column of weather file to make sure the structure of it doesn't change
## Replacing dew point temperature
changeEPWData(r'D:\Weather_files\ObservedData2003\Temporary\DryBTemp.epw',
              r'D:\Weather_files\ObservedData2003\Temporary\DewPTemp.epw',
              dataIndex=7,
              dataList=df2.DewPointTemp)

'D:\\Weather_files\\ObservedData\\Temporary\\DewPTemp.epw'

In [23]:
##Writing column by column of weather file to make sure the structure of it doesn't change
## Replacing relative humidity
changeEPWData(r'D:\Weather_files\ObservedData2003\Temporary\DewPTemp.epw',
              r'D:\Weather_files\ObservedData2003\Temporary\RH.epw',
              dataIndex=8,
              dataList=df2.u)

'D:\\Weather_files\\ObservedData\\Temporary\\RH.epw'

In [24]:
##Writing column by column of weather file to make sure the structure of it doesn't change
## Replacing atmospheric pressure of station
changeEPWData(r'D:\Weather_files\ObservedData2003\Temporary\RH.epw',
              r'D:\Weather_files\ObservedData2003\Temporary\AtmPressure.epw',
              dataIndex=9,
              dataList=df2.pres)

'D:\\Weather_files\\ObservedData\\Temporary\\AtmPressure.epw'

In [25]:
##Writing column by column of weather file to make sure the structure of it doesn't change
## Replacing wind direction
changeEPWData(r'D:\Weather_files\ObservedData2003\Temporary\AtmPressure.epw',
              r'D:\Weather_files\ObservedData2003\Temporary\WindDirection.epw',
              dataIndex=20,
              dataList=df2.WindDirection)

'D:\\Weather_files\\ObservedData\\Temporary\\WindDirection.epw'

In [26]:
##Writing column by column of weather file to make sure the structure of it doesn't change
## Replacing wind speed
changeEPWData(r'D:\Weather_files\ObservedData2003\Temporary\WindDirection.epw',
              r'D:\Weather_files\Observed_Nantes_2003.epw',
              dataIndex=21,
              dataList=df2.ff)

'D:\\Weather_files\\Observed_Nantes_2003.epw.epw'

In [27]:
###Writing column by column of weather file to make sure the structure of it doesn't change
### Replacing Total sky cover
#changeEPWData(r'D:\Weather_files\ObservedData\Temporary\WindSpeed.epw',
#              r'D:\Weather_files\ObservedData\Temporary\TotalSky.epw',
#              dataIndex=22,
#              dataList=df2.TotalCloudCover)

In [28]:
###Writing column by column of weather file to make sure the structure of it doesn't change
### Replacing Opaque sky cover
#changeEPWData(r'D:\Weather_files\ObservedData\Temporary\TotalSky.epw',
#              r'D:\Weather_files\Observed_Nantes_2003.epw',
#              dataIndex=23,
#              dataList=df2.OpaqueCloudCover)

#### Verifying how it looks

In [29]:
# Checking how it looks now
epwFile=EPW(r'D:\Weather_files\Observed_Nantes_2003.epw')
epwDataList=epwFile.to_dict()['data_collections']
epwDataDict = OrderedDict()

for dataColumns in epwDataList:
    dataName=dataColumns['header']['data_type']['name']
    epwDataDict[dataName]=dataColumns['values']

epwDataFrame1 = DataFrame(epwDataDict)
epwDataFrame1.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,Uncertainty Flags,Dry Bulb Temperature,Dew Point Temperature,Relative Humidity,Atmospheric Station Pressure,Extraterrestrial Horizontal Radiation,Extraterrestrial Direct Normal Radiation,Horizontal Infrared Radiation Intensity,Global Horizontal Radiation,Direct Normal Radiation,Diffuse Horizontal Radiation,Global Horizontal Illuminance,Direct Normal Illuminance,Diffuse Horizontal Illuminance,Zenith Luminance,Wind Direction,Wind Speed,Total Sky Cover,Opaque Sky Cover,Visibility,Ceiling Height,Present Weather Observation,Present Weather Codes,Precipitable Water,Aerosol Optical Depth,Snow Depth,Days Since Last Snowfall,Albedo,Liquid Precipitation Depth,Liquid Precipitation Quantity
0,2003,12,31,23,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,-6.971197,-3.34356,98,100940,0,0,312,0,0,0,0,0,0,9999,40,2.1,6,5,9999.0,99999,9,999999999,14,0.103,0,5,0.2,1.7,99.0
1,2003,1,1,0,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,10.6,9.7,94,100870,0,0,332,0,0,0,0,0,0,9999,180,6.2,9,8,9999.0,99999,9,999999999,18,0.092,0,4,0.2,1.5,99.0
2,2003,1,1,1,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,11.853135,11.103876,94,100800,0,0,327,0,0,0,0,0,0,9999,183,7.033333,9,8,9999.0,99999,9,999999999,16,0.092,0,4,0.2,0.0,99.0
3,2003,1,1,2,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,11.626363,10.860776,95,100730,0,0,325,0,0,0,0,0,0,9999,186,7.866667,9,8,9999.0,99999,9,999999999,16,0.092,0,4,0.2,0.0,99.0
4,2003,1,1,3,60,*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?*?,11.2,10.4,95,100620,0,0,324,0,0,0,0,0,0,9999,190,8.7,9,8,9999.0,99999,9,999999999,16,0.092,0,4,0.2,0.0,99.0
