In [1]:
# Dependencies
import pandas as pd

# Hawaii Measurements

In [2]:
# Hawaii measurements csv file path
measurements_path = "raw_data/hawaii_measurements.csv"

# Read csv file as df
measure_df = pd.read_csv(measurements_path, encoding='utf-8')
measure_df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [3]:
# Check to see null values
measure_df.isnull().any()

station    False
date       False
prcp        True
tobs       False
dtype: bool

In [4]:
# Observe null values
null_measure = measure_df[measure_df.isnull().any(axis=1)]
null_measure.head()

Unnamed: 0,station,date,prcp,tobs
4,USC00519397,2010-01-06,,73
26,USC00519397,2010-01-30,,70
29,USC00519397,2010-02-03,,67
43,USC00519397,2010-02-19,,63
61,USC00519397,2010-03-11,,73


In [5]:
# Check prcp nan count
measure_df.count()

station    19550
date       19550
prcp       18103
tobs       19550
dtype: int64

In [6]:
# Decided to drop rows that have nan
clean_measure = measure_df.dropna()

# Reset index
clean_measure = clean_measure.reset_index(drop=True)

# Check column counts
clean_measure.count()

station    18103
date       18103
prcp       18103
tobs       18103
dtype: int64

In [7]:
# Save as csv
clean_measure.to_csv("clean_data/clean_hawaii_measurements.csv", encoding="utf-8", index=False)
clean_measure.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-07,0.06,70


# Hawaii Stations

In [8]:
# Hawaii stations csv file path
stations_path = "raw_data/hawaii_stations.csv"

# Read csv file as df
station_df = pd.read_csv(stations_path, encoding='utf-8')
station_df.head()

Unnamed: 0,station,name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6


In [9]:
# Check to see null values
station_df.isnull().any()

station      False
name         False
latitude     False
longitude    False
elevation    False
dtype: bool

In [10]:
# Assign csv to new variable, no need to edit stations csv
clean_station = station_df

In [11]:
# Save as csv
clean_station.to_csv("clean_data/clean_hawaii_stations.csv", encoding="utf-8", index=False)
clean_station.head()

Unnamed: 0,station,name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
