In [2]:
# Import our libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import seaborn as sns
sns.set(style="ticks")

%matplotlib inline

# Read in our dataset
mastercard = pd.read_csv('MasterCardData\GeoInsights_Synthetic_Output.csv')

# Take a look at the first few rows of the dataset
mastercard.head()

Unnamed: 0,txn_date,industry,quad_id,txn_amt,txn_cnt,acct_cnt,avg_ticket,avg_freq,avg_spend_amt,yoy_txn_amt,yoy_txn_cnt
0,2022-01-01,Total Retail,120230021323230,163.34,335.39,381.79,26.04,99.97,40.81,97.32,37.49
1,2022-01-01,Total Retail,120230023131110,400.82,474.7,462.13,120.85,104.33,106.34,-50.11,-75.64
2,2022-01-01,Eating Places,120230023131110,263.75,241.53,240.95,82.35,120.39,133.75,-31.29,-31.88
3,2022-01-01,Total Apparel,120230023131110,162.59,229.95,252.82,127.35,98.36,111.11,-16.39,-51.05
4,2022-01-01,Total Retail,120230210333222,171.27,128.84,131.45,425.06,100.94,429.76,212.5,-41.44


In [14]:
# Read in our dataset
geo_data = pd.read_csv('MasterCardData\GeoInsights_Hackathon_Quads_GeoInfo.csv', delimiter='|')
geo_data.head()

Unnamed: 0,quad_id,central_latitude,central_longitude,bounding_box
0,120230013313003,48.155093,13.991089,"POLYGON ((13.99658203125 48.15142814322122, 13..."
1,120230021330223,47.580231,12.496948,"POLYGON ((12.50244140625 47.57652571374621, 12..."
2,120230023133222,47.282955,12.573853,"POLYGON ((12.579345703125 47.27922900257082, 1..."
3,120230123333302,47.058896,15.430298,"POLYGON ((15.435791015625 47.05515408550347, 1..."
4,120221133132132,47.312759,11.14563,"POLYGON ((11.151123046875 47.30903424774781, 1..."


In [15]:
# Haversine formula to calculate distance between two lat-lon points
def haversine(lon1, lat1, lon2, lat2):
    # Radius of the Earth in kilometers
    R = 6371.0
    
    # Convert degrees to radians
    lon1_radians = np.radians(lon1)
    lat1_radians = np.radians(lat1)
    lon2_radians = np.radians(lon2)
    lat2_radians = np.radians(lat2)
    
    # Difference in coordinates
    dlon = lon2_radians - lon1_radians
    dlat = lat2_radians - lat1_radians
    
    # Haversine formula
    a = np.sin(dlat / 2)**2 + np.cos(lat1_radians) * np.cos(lat2_radians) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    
    # Distance in kilometers
    distance = R * c
    return distance

# Function to filter the dataset
def filter_by_radius(dataframe, event_location, radius):
    """
    dataframe: pandas DataFrame containing 'latitude' and 'longitude' columns
    event_location: tuple of (latitude, longitude) for the event location
    radius: radius in kilometers
    """
    # Apply the haversine function to each row in the dataframe
    distances = dataframe.apply(lambda row: haversine(event_location[1], event_location[0],
                                                      row['central_longitude'], row['central_latitude']), axis=1)
    
    # Filter the dataframe for rows where the distance is less than or equal to the radius
    return dataframe[distances <= radius]

In [25]:
# Calculate the middle point of Point A and Point B
def get_middlepoint(point_a, point_b):
    # Calculate the midpoint
    midpoint_longitude = (point_a[0] + point_b[0]) / 2
    midpoint_latitude = (point_a[1] + point_b[1]) / 2
    midpoint = (midpoint_latitude, midpoint_longitude)
    return midpoint

In [37]:
# Event coordinates
adidas_location = (47.169753170157186, 13.106657266616821)
snowboard_location = get_middlepoint((47.110209967822342, 13.132610321044922), (47.106588081374838, 13.122954368591309))
red_bull_location = get_middlepoint((47.112542887152543, 13.133828043937683), (47.115237117435001, 13.133377432823181))
ski_location = (47.062163427549223, 13.058323860168455)

In [43]:
# Filtered data
R = 10 # Radius in kilometers
ski_filtered_df = filter_by_radius(geo_data, ski_location, R)
snowboard_filtered_df = filter_by_radius(geo_data, snowboard_location, R)
red_bull_filtered_df = filter_by_radius(geo_data, red_bull_location, R)
adidas_filtered_df = filter_by_radius(geo_data, adidas_location, R)

In [44]:
ski_filtered_df

Unnamed: 0,quad_id,central_latitude,central_longitude,bounding_box
92,120230032321213,47.118738,13.134155,"POLYGON ((13.1396484375 47.11499982620772, 13...."
550,120230032321212,47.118738,13.123169,"POLYGON ((13.128662109375 47.11499982620772, 1..."
1010,120230032323012,47.088825,13.123169,"POLYGON ((13.128662109375 47.08508535995384, 1..."
1064,120230032321231,47.111261,13.134155,"POLYGON ((13.1396484375 47.10752278534249, 13...."
2305,120230032321300,47.126213,13.145142,"POLYGON ((13.150634765625 47.12247581664114, 1..."
2384,120230032321302,47.118738,13.145142,"POLYGON ((13.150634765625 47.11499982620772, 1..."
2668,120230032321230,47.111261,13.123169,"POLYGON ((13.128662109375 47.10752278534249, 1..."
3706,120230032321211,47.126213,13.134155,"POLYGON ((13.1396484375 47.12247581664114, 13...."
3730,120230032321233,47.103784,13.134155,"POLYGON ((13.1396484375 47.1000446940252, 13.1..."


In [22]:
import geopandas as gpd

# Load the GeoJSON file into a GeoDataFrame again
geojson_path = 'InveniumData\TT Festival_Invenium\Events Gastein\Gastein_sites.geojson'
gdf = gpd.read_file(geojson_path)

# Function to extract latitude and longitude based on geometry type
def extract_lat_lon(geometry):
    if geometry.geom_type == 'Point':
        return pd.Series({'latitude': geometry.y, 'longitude': geometry.x})
    else:
        # If it's a polygon or multi-polygon, we take the centroid
        centroid = geometry.centroid
        return pd.Series({'latitude': centroid.y, 'longitude': centroid.x})

# Apply the function to the 'geometry' column
gdf[['latitude', 'longitude']] = gdf.apply(lambda row: extract_lat_lon(row['geometry']), axis=1)

# Display the first few rows to verify
gdf[['geometry', 'latitude', 'longitude']].head()

Unnamed: 0,geometry,latitude,longitude
0,POINT (13.10666 47.16975),47.169753,13.106657
1,"LINESTRING (13.13261 47.11021, 13.12295 47.10659)",47.108399,13.127782
2,"LINESTRING (13.13383 47.11254, 13.13338 47.11524)",47.11389,13.133603
3,POINT (13.05832 47.06216),47.062163,13.058324
