# Daily weather data in the Netherlands

- https://www.knmi.nl/nederland-nu/klimatologie/daggegevens
- https://aqicn.org/city/amsterdam/

## Installing packages

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

## Importing data

### Weather

YYYYMMDD = Date (YYYY=year MM=month DD=day) / Date (YYYY=year MM=month DD=day)
DDVEC = Vector mean wind direction in degrees (360=north, 90=east, 180=south, 270=west, 0=cale/variable). See http://www.knmi.nl/kennis-en-datacentrum/grond/klimatologische-brochures-en-boeken / Vector mean wind direction in degrees (360=north, 90=east, 180=south, 270=west, 0=calm/variable)
FHVEC = Vector mean wind speed (in 0.1 m/s). See http://www.knmi.nl/kennis-en-datacentrum/grond/klimatologische-brochures-en-boeken / Vector mean windspeed (in 0.1 m/s)
FG = 24-hour mean wind speed (in 0.1 m/s) / Daily mean wind speed (in 0.1 m/s)
FHX = Highest hourly mean wind speed (in 0.1 m/s) / Maximum hourly mean wind speed (in 0.1 m/s)
FHXH = Hour segment in which FHX was measured / Hourly division in which FHX was measured
FHN = Lowest hourly mean wind speed (in 0.1 m/s) / Minimum hourly mean wind speed (in 0.1 m/s)
FHNH = Hour segment in which FHN was measured / Hourly division in which FHN was measured
FXX = Highest wind gust (in 0.1 m/s) / Maximum wind gust (in 0.1 m/s)
FXXH = Hour segment in which FXX was measured / Hourly division in which FXX was measured
TG = 24-hour average temperature (in 0.1 degrees Celsius) / Daily mean temperature in (0.1 degrees Celsius)
TN = Minimum temperature (in 0.1 degrees Celsius) / Minimum temperature (in 0.1 degrees Celsius)
TNH = Hourly division in which TN was measured / Hourly division in which TN was measured
TX = Maximum temperature (in 0.1 degrees Celsius) / Maximum temperature (in 0.1 degrees Celsius)
TXH = Hour segment in which TX was measured / Hourly division in which TX was measured
T10N = Minimum temperature at 10 cm height (in 0.1 degrees Celsius) / Minimum temperature at 10 cm above surface (in 0.1 degrees Celsius)
T10NH = 6-hour period in which T10N was measured / 6-hourly division in which T10N was measured; 6=0-6 UT, 12=6-12 UT, 18=12-18 UT, 24=18-24 UT
SQ = Sunshine duration (in 0.1 hour) calculated from global radiation (-1 for <0.05 hour) / Sunshine duration (in 0.1 hour) calculated from global radiation (-1 for <0.05 hour)
SP = Percentage of longest possible sunshine duration / Percentage of maximum potential sunshine duration
Q = Global radiation (in J/cm2) / Global radiation (in J/cm2)
DR = Duration of precipitation (in 0.1 hour) / Precipitation duration (in 0.1 hour)
RH = Daily precipitation amount (in 0.1 mm) (-1 for <0.05 mm) / Daily precipitation amount (in 0.1 mm) (-1 for <0.05 mm)
RHX = Maximum hourly precipitation amount (in 0.1 mm) (-1 for <0.05 mm) / Maximum hourly precipitation amount (in 0.1 mm) (-1 for <0.05 mm)
RHXH = Hour segment in which RHX was measured / Hourly division in which RHX was measured
PG = 24-hour mean air pressure converted to sea level (in 0.1 hPa) calculated from 24 hourly values ​​/ Daily mean sea level pressure (in 0.1 hPa) calculated from 24 hourly values
PX = Highest hourly value of the air pressure converted to sea level (in 0.1 hPa) / Maximum hourly sea level pressure (in 0.1 hPa)
PXH = Hourly division in which PX was measured
PN = Lowest hourly value of air pressure converted to sea level (in 0.1 hPa) / Minimum hourly sea level pressure (in 0.1 hPa)
PNH = Hourly division in which PN was measured
VVN = Minimum Visibility Occurred / Minimum Visibility; 0: <100m, 1:100-200m, 2:200-300m,..., 49:4900-5000m, 50:5-6km, 56:6-7km, 57:7-8 km,..., 79:29-30 km, 80:30-35 km, 81:35-40 km,..., 89: >70 km)
VVNH = Hour segment in which VVN was measured / Hourly division in which VVN was measured
VVX = Maximum Visibility Occurred / Maximum Visibility; 0: <100m, 1:100-200m, 2:200-300m,..., 49:4900-5000m, 50:5-6km, 56:6-7km, 57:7-8 km,..., 79:29-30 km, 80:30-35 km, 81:35-40 km,..., 89: >70 km)
VVXH = Hour segment in which VVX was measured / Hourly division in which VVX was measured
NG = 24-hour average cloud cover (coverage of the upper air in eighths, 9=superior invisible) / Mean daily cloud cover (in octants, 9=sky invisible)
UG = 24-hour average relative humidity (in percents) / Daily mean relative atmospheric humidity (in percents)
UX = Maximum relative atmospheric humidity (in percents) / Maximum relative atmospheric humidity (in percents)
UXH = Hourly division in which UX was measured
UN = Minimum relative atmospheric humidity (in percents) / Minimum relative atmospheric humidity (in percents)
UNH = Hour segment in which UN was measured / Hourly division in which UN was measured
EV24 = Reference crop evaporation (Makkink) (in 0.1 mm) / Potential evapotranspiration (Makkink) (in 0.1 mm)


In [24]:
weather = pd.read_csv(r'C:\Users\claud\data_final_project\Usefull\etmgeg_240_Ams_Schiphol\etmgeg_240.txt')
weather = weather[(weather.YYYYMMDD >= 20131231)]
weather

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,# STN,YYYYMMDD,DDVEC,FHVEC,FG,FHX,FHXH,FHN,FHNH,FXX,...,VVNH,VVX,VVXH,NG,UG,UX,UXH,UN,UNH,EV24
23010,240,20131231,167,72,73,90,10,50,5,130,...,24,83,18,8,83,95,24,63,18,2
23011,240,20140101,168,78,80,110,21,60,2,170,...,1,75,15,7,89,95,2,78,15,2
23012,240,20140102,191,62,70,100,1,50,16,140,...,10,70,2,5,88,93,10,81,13,3
23013,240,20140103,199,91,97,130,23,70,1,230,...,6,75,24,6,86,95,7,78,20,2
23014,240,20140104,169,64,69,110,1,40,23,150,...,6,74,14,7,86,93,22,79,14,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25884,240,20211113,265,9,25,70,1,10,20,110,...,2,70,11,8,93,97,2,82,13,5
25885,240,20211114,64,32,33,50,10,10,1,80,...,2,67,22,8,91,98,2,81,22,3
25886,240,20211115,63,31,33,40,10,20,23,80,...,19,70,6,8,89,94,15,85,1,1
25887,240,20211116,173,20,24,50,23,10,3,80,...,17,56,2,8,90,97,18,85,2,1


In [36]:
weather.dtypes

# STN        int64
YYYYMMDD     int64
DDVEC        int64
FHVEC        int64
   FG        int64
  FHX        int64
 FHXH        int64
  FHN        int64
 FHNH        int64
  FXX       object
 FXXH       object
   TG        int64
   TN        int64
  TNH       object
   TX        int64
  TXH       object
 T10N       object
T10NH       object
   SQ       object
   SP       object
    Q       object
   DR       object
   RH       object
  RHX       object
 RHXH       object
   PG        int64
   PX       object
  PXH       object
   PN       object
  PNH       object
  VVN       object
 VVNH       object
  VVX       object
 VVXH       object
   NG       object
   UG       object
   UX       object
  UXH       object
   UN       object
  UNH       object
 EV24       object
dtype: object

### Air quality

![image.png](attachment:image.png)

In [25]:
air_quality = pd.read_csv(r'C:\Users\claud\data_final_project\Usefull\amsterdam-air-quality.csv')

In [26]:
newdate = []

for i in air_quality['date']:
    a = i.split('/')
    year = "{:04d}".format(int(a[0]))
    month = "{:02d}".format(int(a[1]))
    day = "{:02d}".format(int(a[2]))
    b = year + month + day
    newdate.append(b)
    
air_quality['newdate'] = newdate

In [27]:
air_quality = air_quality.sort_values(by=['newdate'], ascending=True)
air_quality

Unnamed: 0,date,pm25,pm10,o3,no2,so2,co,newdate
2664,2013/12/31,,,,30,,,20131231
2579,2014/1/1,,42,,32,,,20140101
2580,2014/1/2,,14,,28,,,20140102
2581,2014/1/3,,17,,26,,,20140103
2582,2014/1/4,,19,,28,,,20140104
...,...,...,...,...,...,...,...,...
2276,2021/11/12,,40,15,20,,3,20211112
2277,2021/11/13,,22,9,12,,3,20211113
2278,2021/11/14,,22,15,13,,3,20211114
2279,2021/11/15,,25,13,16,,3,20211115


In [37]:
air_quality.dtypes

date       object
 pm25      object
 pm10      object
 o3        object
 no2       object
 so2       object
 co        object
newdate    object
dtype: object

In [38]:
air_quality['newdate'] = air_quality['newdate'].astype(int)

In [39]:
air_quality.dtypes

date       object
 pm25      object
 pm10      object
 o3        object
 no2       object
 so2       object
 co        object
newdate     int32
dtype: object

In [42]:
a = weather['YYYYMMDD']
b = air_quality['newdate']
missing = []

for x in a:
    if x not in b:
        missing.append(x)
        
len(missing)

2879

In [35]:
missing

[20131231,
 20140101,
 20140102,
 20140103,
 20140104,
 20140105,
 20140106,
 20140107,
 20140108,
 20140109,
 20140110,
 20140111,
 20140112,
 20140113,
 20140114,
 20140115,
 20140116,
 20140117,
 20140118,
 20140119,
 20140120,
 20140121,
 20140122,
 20140123,
 20140124,
 20140125,
 20140126,
 20140127,
 20140128,
 20140129,
 20140130,
 20140131,
 20140201,
 20140202,
 20140203,
 20140204,
 20140205,
 20140206,
 20140207,
 20140208,
 20140209,
 20140210,
 20140211,
 20140212,
 20140213,
 20140214,
 20140215,
 20140216,
 20140217,
 20140218,
 20140219,
 20140220,
 20140221,
 20140222,
 20140223,
 20140224,
 20140225,
 20140226,
 20140227,
 20140228,
 20140301,
 20140302,
 20140303,
 20140304,
 20140305,
 20140306,
 20140307,
 20140308,
 20140309,
 20140310,
 20140311,
 20140312,
 20140313,
 20140314,
 20140315,
 20140316,
 20140317,
 20140318,
 20140319,
 20140320,
 20140321,
 20140322,
 20140323,
 20140324,
 20140325,
 20140326,
 20140327,
 20140328,
 20140329,
 20140330,
 20140331,