In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"


# Common imports
import numpy as np
import os
import pandas as pd

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [2]:
tropics = pd.read_csv('tropical_storms.csv')

In [3]:
canes = pd.read_csv('Continental United States Hurricane Impacts:Landfalls 1851-2021.csv')

In [4]:
tropics.columns

Index(['storm_number', 'date', 'time', 'lat_n', 'lon_w', 'max_winds(kt)',
       'landfall_state', 'storm_names'],
      dtype='object')

In [5]:
canes.columns

Index(['year', 'month', 'affected_states', 'category', 'mb_pressure',
       'wind_kt', 'name'],
      dtype='object')

In [6]:
tropics.shape

(392, 8)

In [7]:
canes.shape

(338, 7)

In [8]:
tropics.head()

Unnamed: 0,storm_number,date,time,lat_n,lon_w,max_winds(kt),landfall_state,storm_names
0,6,10/19/1851,1500,41.1,71.7,50,NY,
1,3,8/19/1856,1100,34.8,76.4,50,NC,
2,4,9/30/1857,1000,25.8,97.0,50,TX,
3,3,9/14/1858,1500,27.6,82.7,60,FL,
4,3,9/16/1858,300,35.2,75.2,50,NC,


### find missing values

### find outliars

In [9]:
tropics.dtypes

storm_number        int64
date               object
time               object
lat_n             float64
lon_w             float64
max_winds(kt)       int64
landfall_state     object
storm_names        object
dtype: object

* remove Z from time
* concat date with time
* convert to datetime

In [10]:
# date_time = tropics["date"] + " " + tropics["time"]

In [11]:
# tropics = tropics.append(date_time , ignore_index = True)

In [12]:
tropics jkhg

SyntaxError: invalid syntax (3741309041.py, line 1)

In [None]:
tropics.loc[['storm_number', 'date']]


# df.loc[['viper', 'sidewinder']]


In [None]:
topics['DateTime'] = pd.to_datetime(df['DateTime'])

In [None]:
louisiana = tropics[tropics['landfall_state'] == 'LA']

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
tropics.hist(bins=50, figsize=(20,15))
save_fig("attribute_histogram_plots")
plt.show()


In [None]:
# to make this notebook's output identical at every run
np.random.seed(42)

In [None]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(tropics, test_size=0.2, random_state=42)



In [None]:
test_set.head()

In [None]:
tropics["max_winds(kt)"].hist()

In [None]:
tropics.dtypes

In [None]:
tropics['lon_w'].astype(float)

In [None]:
tropics.plot(kind="scatter", x="lon_w", y="lat_n")
save_fig("bad_visualization_plot")

In [None]:
tropics.plot(kind="scatter", x="lon_w", y="lat_n", alpha=0.1)
save_fig("better_visualization_plot")

In [None]:
tropics.plot(kind="scatter", x="lon_w", y="lat_n", alpha=0.8,
             s=tropics["max_winds(kt)"]/100, label="max_winds(kt)", figsize=(10,5),
             c="max_winds(kt)", cmap=plt.get_cmap("jet"), colorbar=True,
             sharex=False)
plt.legend()
save_fig("housing_prices_scatterplot")
