In [1]:
import pandas as pd
import geopandas as gpd
import backend_codes.load_subsets as ls

# This notebook calculates satistics related to landuse

In [2]:
# Rolling Window Parameters
start = "20200406"
end = "20200409"
denom = "3days"
allow_even_subsets = False
sam = False
del_one_tweeters = True

In [3]:
ref = "statistics_notebookdemo/" + denom + "_overlap.csv"
if type(sam) == int:
    ref = ref.split('.csv')[0] + "_" +str(sam) + ".csv"

In [4]:
path = f"data/{ref}"
file = pd.read_csv(f'data/{ref}', index_col='start_date')

In [5]:
landuse = gpd.read_file('data/shps/land_use_land_cover.shp').to_crs(4326)

In [6]:
landuse['counts'] = 0

### Load tweets and handle geometries

In [7]:
tweets_df = ls.load_and_subset(start, end, del_one_tweeters=del_one_tweeters, samp_size=sam, tweets_path="data/tweets/preprocessed_tweets_with_poi_location.csv")
tweets_df['wkt'] = gpd.GeoSeries.from_wkt(tweets_df.wkt)
tweets_df = gpd.GeoDataFrame(tweets_df, geometry='wkt', crs=4326)

### Define function for counting tweets in polygons
The function also writes the tweet counts into the geodataframe that contains the geometries.

In [8]:
def count_points(zone):
    idx = zone.name
    poly = zone.geometry
    clipped = gpd.clip(tweets_df, poly)
    no_of_tweets = len(clipped)
    landuse.loc[idx, 'counts'] = no_of_tweets

### Apply function to the geometry dataframe

In [9]:
%%capture
landuse.apply(count_points, axis=1)

In [10]:
stats = {}
for cls in landuse.lu_transl.values.tolist():
    stats[f'rel_tweets_in_{cls}'] = landuse.loc[landuse.lu_transl == cls].counts.values[0] / len(tweets_df)

In [11]:
start = int(start)
for name, val in stats.items():
    file.loc[start, name] = val
    
file.to_csv(f'data/{ref}')

In [12]:
for key, val in stats.items():
    print(key, ":", round(val, 4))

rel_tweets_in_agricultural_areas : 0.0
rel_tweets_in_trade_and_service_areas : 0.0466
rel_tweets_in_water_bodies : 0.0
rel_tweets_in_mineral_exploration_areas : 0.0
rel_tweets_in_transport_areas : 0.0087
rel_tweets_in_industrial_areas : 0.0029
rel_tweets_in_rocky_outcrops_and_sedimentary_deposits : 0.0233
rel_tweets_in_leisure_areas : 0.0292
rel_tweets_in_institutional_and_public_infrastructure_areas : 0.0146
rel_tweets_in_residential_areas : 0.6676
rel_tweets_in_unbuilt_areas : 0.0029
rel_tweets_in_education_and_health_areas : 0.0058
rel_tweets_in_tree_and_shrub_cover : 0.0058
rel_tweets_in_woody_grass_cover : 0.0029
rel_tweets_in_favela : 0.1895
rel_tweets_in_Areas_subject_to_flooding : 0.0
