In [None]:
import geopandas as gpd
import fiona
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
all_data = gpd.read_file('chapter_10_data.kml')
all_data

In [None]:
all_data.plot(figsize=(15,15),alpha=0.1)

In [None]:
info_polygons = all_data.loc[1:7,:]
info_polygons


In [None]:
itineraries = all_data.loc[8:,:]
itineraries


In [None]:
from shapely.geometry.point import Point

results = []

# split the lines into points, asw e want to treat them as points
for i, row in itineraries.iterrows():
    # making the line string into a list of the coordinates as strings and removing redundant information
    list_of_points_extracted = str(row['geometry']).strip('LINESTRING Z (').strip(')').split(',')
    list_of_points_extracted = [point[:-2] for point in list_of_points_extracted]
    
    # convert lat and long into floats
    list_of_points_extracted = [Point([ float(y) for y in x.strip(' ').split(' ')]) for x in list_of_points_extracted]
    list_of_points_extracted = [[i, row.Name] + [x] for x in list_of_points_extracted]
    results += list_of_points_extracted

results_df = pd.DataFrame(results)
results_df.columns = ['client_id', 'target', 'point']
results_df


In [None]:
import geopandas as gpd
gdf = gpd.GeoDataFrame(results_df, geometry='point')
gdf


In [None]:
joined_data = gpd.sjoin(gdf, info_polygons, how='left')
joined_data


In [None]:
# inspect NA
joined_data['na'] = joined_data.Name.isna()
joined_data.groupby('client_id').na.sum()


In [None]:
# drop na
joined_data = joined_data.dropna()
joined_data


In [None]:
location_behavior = joined_data.pivot_table(index='client_id', columns='Name', values='target',aggfunc='count').fillna(0)
location_behavior


In [None]:
# standardize
location_behavior = location_behavior.div( location_behavior.sum(axis=1), axis=0 )
location_behavior


In [None]:
X = location_behavior.values
X


In [None]:
y = itineraries.Name.values
y


In [None]:
# stratified train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)


In [None]:
# logistic regression
from sklearn.linear_model import LogisticRegression

my_lr = LogisticRegression()
my_lr.fit(X_train, y_train)


In [None]:
preds = my_lr.predict(X_test)
preds


In [None]:
# indeed one error for the log reg
pd.DataFrame({'real': y_test, 'pred': preds})


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

conf_mat = confusion_matrix(y_test, preds, normalize=None)
conf_mat_plot = ConfusionMatrixDisplay(conf_mat, display_labels = set(y_test))
conf_mat_plot.plot()


In [None]:
from sklearn.tree import DecisionTreeClassifier
my_dt = DecisionTreeClassifier()
my_dt.fit(X_train, y_train)
preds = my_dt.predict(X_test)
pd.DataFrame({'real': y_test, 'pred': preds})


In [None]:
conf_mat = confusion_matrix(y_test, preds, normalize=None)
conf_mat_plot = ConfusionMatrixDisplay(conf_mat, display_labels = set(y_test))
conf_mat_plot.plot()
