In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [2]:
def get_pca(X):
    """
    Transform data to 2D points for plotting. Should return an array with shape (n, 2).
    """
    flatten_model = make_pipeline(
        MinMaxScaler(),
        PCA(2)
    )
    X2 = flatten_model.fit_transform(X)
    assert X2.shape == (X.shape[0], 2)
    return X2

In [3]:
def get_clusters(X):
    """
    Find clusters of the weather data.
    """
    model = make_pipeline(
        KMeans(n_clusters=10)
    )
    model.fit(X)
    return model.predict(X)

In [4]:
def main():
    data = pd.read_csv("monthly-data-labelled.csv")

    X = data.iloc[:,1:].values
    y = data['city'].values
    
    X2 = get_pca(X)
    clusters = get_clusters(X)
    plt.scatter(X2[:, 0], X2[:, 1], c=clusters, cmap='tab10', edgecolor='k', s=20)
    plt.savefig('clusters.png')

    df = pd.DataFrame({
        'cluster': clusters,
        'city': y,
    })
    counts = pd.crosstab(df['city'], df['cluster'])
    print(counts)

In [5]:
if __name__ == '__main__':
    main()

cluster          0   1   2   3   4   5   6   7   8   9
city                                                  
Anchorage        0   4   4  19  12   0   0  17   0   0
Atlanta          0   0   0   0   0  45   2   0   0   0
Atlantic City    0   0   0   0   0  38   0   0   7   0
Calgary          0   2   0   0   9   0   0   0   4  37
Chicago          0   1   0   0   0   1   0   0   2  48
Denver           0   0   0   0   0   9   0   0   0   0
Edmonton         0  14   0   3  21   0   0   9   0   4
Gander           0  13   9   7   1   0   0  19   0   2
Halifax          0  12   0   0   0   0   0   0   0  38
London           0   4   0   0   0   0   0   0   0  38
Los Angeles     39   0   0   0   0   0   0   0   0   0
Miami            0   0   0   0   0   0  43   0   0   0
Montreal         0  14   2   2   2   0   0   7   0   1
New Orleans      0   0   0   0   0   0  45   0   0   0
Ottawa           0  23   2   3   7   0   0  10   0   6
Portland         0   0   0   0   0   7   0   0  31   0
Québec    