In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [8]:
weather = pd.read_csv('AucklandWeather.csv')

In [9]:
weather.head(5)

Unnamed: 0,date,temperature,relative_humidity,wind_direction_deg,wind_speed_knots,gust,Forecast
0,1/09/2019 0:00,14,67.12,210,5,0,cloudy
1,1/09/2019 0:30,14,67.12,210,5,0,cloudy
2,1/09/2019 1:00,14,71.83,200,6,0,fine
3,1/09/2019 1:30,15,62.92,210,7,0,fine
4,1/09/2019 2:00,15,62.92,180,6,0,fine


In [10]:
features = ['temperature','relative_humidity','wind_direction_deg','wind_speed_knots']
dt = weather.loc[:,features].values 

In [11]:
#standardize
from sklearn.preprocessing import StandardScaler
dt = StandardScaler().fit_transform(dt)

In [12]:
from sklearn.datasets.samples_generator import make_blobs
from sklearn.neighbors import NearestNeighbors
import seaborn as sns
sns.set()


In [13]:
# cluster analysis
from scipy import stats
from sklearn.cluster import DBSCAN

min_samples = dt.shape[1]+1 #  Rule of thumb; number of dimensions D in the data set, as minPts ≥ D + 1
dbscan = DBSCAN(eps=3.5, min_samples = min_samples)
dblabel=dbscan.fit(dt).labels_

In [14]:
#visualise the result

from sklearn.manifold import TSNE
import seaborn as sns
import mpl_toolkits.mplot3d.axes3d as p3
from matplotlib import animation

%matplotlib inline 

In [15]:
def prepare_tsne(n_components, data, kmeans_labels):
    names = ['x', 'y', 'z']
    matrix = TSNE(n_components=n_components).fit_transform(data)
    df_matrix = pd.DataFrame(matrix)
    df_matrix.rename({i:names[i] for i in range(n_components)}, axis=1, inplace=True)
    df_matrix['labels'] = kmeans_labels
    
    return df_matrix

def plot_tsne(tnse_data, kmeans_labels):
    df_tsne = pd.DataFrame(tsne_data).rename({0: 'x', 1: 'y'}, axis=1)
    df_tsne['z'] = kmeans_labels
    sns.scatterplot(x=df_tsne.x, y=df_tsne.y, hue=df_tsne.z, palette="Set2")
    plt.show()
    
def plot_3d(df, name='labels'):
    iris = px.data.iris()
    fig = px.scatter_3d(df, x='x', y='y', z='z',
                  color=name, opacity=0.5)
    

    fig.update_traces(marker=dict(size=3))
    fig.show()
    
def plot_animation(df, label_column, name):
    def update(num):
        ax.view_init(200, num)

    N=360
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(tsne_3d_df['x'], tsne_3d_df['y'], tsne_3d_df['z'], c=tsne_3d_df[label_column],
               s=6, depthshade=True, cmap='Paired')
    ax.set_zlim(-15, 25)
    ax.set_xlim(-20, 20)
    plt.tight_layout()
    ani = animation.FuncAnimation(fig, update, N, blit=False, interval=50)
    ani.save('{}.gif'.format(name), writer='imagemagick')
    plt.show()

In [16]:
ts=prepare_tsne(3,dt,dblabel)

In [None]:
dt.shape[1]

Note: you may need to restart the kernel to use updated packages.
