# PANDAS

In [42]:
import pandas as pd
import numpy as np

In [43]:
data = {
    "DOY" : [1,2,3],
    "Wind_Speed" : [2.2,3.1,-999],
    "Wind_Direction" : ['E', 'NW', 'NW'],
    "Precipitation" : [0, 18, 25]
}

data

{'DOY': [1, 2, 3],
 'Wind_Speed': [2.2, 3.1, -999],
 'Wind_Direction': ['E', 'NW', 'NW'],
 'Precipitation': [0, 18, 25]}

In [44]:
df = pd.DataFrame(data)
df.head(2)

Unnamed: 0,DOY,Wind_Speed,Wind_Direction,Precipitation
0,1,2.2,E,0
1,2,3.1,NW,18


In [45]:
df.columns

Index(['DOY', 'Wind_Speed', 'Wind_Direction', 'Precipitation'], dtype='object')

In [46]:
df.dtypes

DOY                 int64
Wind_Speed        float64
Wind_Direction     object
Precipitation       int64
dtype: object

In [47]:
# properties vs functions - a.size vs a.sum()
# examples .... john.age vs john.makecoffee()

In [48]:
df['Wind_Speed'] # calling column of values

0      2.2
1      3.1
2   -999.0
Name: Wind_Speed, dtype: float64

In [49]:
df.Wind_Speed # calling column of values

0      2.2
1      3.1
2   -999.0
Name: Wind_Speed, dtype: float64

In [50]:
df.iloc[0:2, 0:2] # calling specific location in table

Unnamed: 0,DOY,Wind_Speed
0,1,2.2
1,2,3.1


In [51]:
df.loc[0:2, ['Precipitation', 'Wind_Speed']] # last item included ... NOT like a range (iloc acts as a range)

Unnamed: 0,Precipitation,Wind_Speed
0,0,2.2
1,18,3.1
2,25,-999.0


In [52]:
# logical indexing ...

idx = df.Wind_Speed > 3
print(idx)

df[idx]

0    False
1     True
2    False
Name: Wind_Speed, dtype: bool


Unnamed: 0,DOY,Wind_Speed,Wind_Direction,Precipitation
1,2,3.1,NW,18


In [56]:
idx_missing = df.isin([-999])
df[idx_missing]

Unnamed: 0,DOY,Wind_Speed,Wind_Direction,Precipitation
0,,,,
1,,,,
2,,-999.0,,


In [58]:
df[idx_missing] = np.nan  # replacing specific values with Nan's ("Not a Number") ... look like string ('NaN'), but treated specially
df

Unnamed: 0,DOY,Wind_Speed,Wind_Direction,Precipitation
0,1,2.2,E,0
1,2,3.1,NW,18
2,3,,NW,25


In [59]:
df.isna()

Unnamed: 0,DOY,Wind_Speed,Wind_Direction,Precipitation
0,False,False,False,False
1,False,False,False,False
2,False,True,False,False


In [60]:
df2 = df.fillna(df.Wind_Speed.mean())

df2
#df = df.fillna(df.Wind_Speed.mean())

Unnamed: 0,DOY,Wind_Speed,Wind_Direction,Precipitation
0,1,2.2,E,0
1,2,3.1,NW,18
2,3,2.65,NW,25


In [61]:
dates = pd.date_range('20190101', periods=3)
dates

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03'], dtype='datetime64[ns]', freq='D')

In [67]:
df2['Dates'] = dates
df2

Unnamed: 0,DOY,Wind_Speed,Wind_Direction,Precipitation,dates,Dates
0,1,2.2,E,0,2019-01-01,2019-01-01
1,2,3.1,NW,18,2019-01-02,2019-01-02
2,3,2.65,NW,25,2019-01-03,2019-01-03


In [53]:
df.count()

DOY               3
Wind_Speed        3
Wind_Direction    3
Precipitation     3
dtype: int64

In [54]:
print(df['Wind_Speed'].max())
print(df['Wind_Speed'].min())
print(df['Precipitation'].cumsum())
print(df['Wind_Direction'].unique())

3.1
-999.0
0     0
1    18
2    43
Name: Precipitation, dtype: int64
['E' 'NW']


In [55]:
sum(df.Precipitation)

43