# Data visualisation and Forecasting 
### London's AirBnB and Rentals data 


In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
import matplotlib.pylab as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.colors import n_colors
import plotly.express as px
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

%matplotlib inline
sns.set_context("paper", font_scale = 2)
sns.axes_style({ 'xtick.direction': 'out', 'ytick.direction': 'out',})
sns.set_style("darkgrid")



In [11]:
pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable
Collecting tensorflow
  Downloading tensorflow-2.15.0-cp39-cp39-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.15.0
  Downloading tensorflow_intel-2.15.0-cp39-cp39-win_amd64.whl (300.8 MB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1
  Downloading tensorflow_io_gcs_filesystem-0.31.0-cp39-cp39-win_amd64.whl (1.5 MB)
Collecting tensorflow-estimator<2.16,>=2.15.0
  Downloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl (441 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1
  Downloading gast-0.5.4-py3-none-any.whl (19 kB)
Collecting keras<2.16,>=2.15.0
  Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
Collecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting numpy<2.0.0,>=1.23.5
  Downloading numpy-1.26.2-cp39-cp39-win_amd64.whl (15.8 MB)
Collecting ml-dtypes~=0.2.0
  Downloading ml_dtypes-0.2.0-cp39-cp39-win_amd64.whl (938 kB)
Collecting flatbuffers>

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
daal4py 2021.5.0 requires daal==2021.4.0, which is not installed.
scipy 1.7.3 requires numpy<1.23.0,>=1.16.5, but you have numpy 1.26.2 which is incompatible.
numba 0.55.1 requires numpy<1.22,>=1.18, but you have numpy 1.26.2 which is incompatible.
google-cloud-storage 1.31.0 requires google-auth<2.0dev,>=1.11.0, but you have google-auth 2.25.2 which is incompatible.
google-cloud-core 1.7.1 requires google-auth<2.0dev,>=1.24.0, but you have google-auth 2.25.2 which is incompatible.
google-api-core 1.25.1 requires google-auth<2.0dev,>=1.21.1, but you have google-auth 2.25.2 which is incompatible.


#### Load data




In [7]:
# Define the file path
Airbnb_path = 'm:/pc/desktop/Project/Airbnb_Listing.csv'
Rental_path = 'm:/pc/desktop/Project/Rental_Price.csv'

# Load the CSV file into a Pandas DataFrame
Airbnb_df = pd.read_csv(Airbnb_path)
Rental_df = pd.read_csv(Rental_path)


In [3]:
# Define the GeoJSON file path
geojson_path = 'm:/pc/desktop/Project/neighbourhoods.geojson'
# Load the GeoJSON file into a GeoPandas GeoDataFrame
neighbourhoods_gdf = gpd.read_file(geojson_path)

In [10]:
# Display the first few rows of the DataFrame
print(Airbnb_df.head())

                Borough Bedroom.Category  Count_of_listing   Mean  \
0  Barking and Dagenham        1 Bedroom                50   67.0   
1  Barking and Dagenham       2 Bedrooms                20  134.0   
2  Barking and Dagenham       3 Bedrooms                 0  139.0   
3  Barking and Dagenham           Studio                 0   90.0   
4                Barnet        1 Bedroom               160   79.0   

   Lower_quartile  Median  Upper_quartile  Annual_OCC  
0            39.0    56.0            94.0   26.889873  
1           102.0   125.0           180.0   23.067101  
2           124.0   152.0           152.0   22.270553  
3            90.0    90.0            90.0   10.655738  
4            47.0    68.0            96.0   28.392099  


In [8]:
print(Rental_df.head())

                Borough Bedroom.Category  Count.of.rents  Mean  \
0  Barking and Dagenham           Studio              10   779   
1  Barking and Dagenham        1 Bedroom             200  1046   
2  Barking and Dagenham       2 Bedrooms             340  1266   
3  Barking and Dagenham       3 Bedrooms             240  1505   
4                Barnet           Studio             110   962   

   Lower.quartile  Median  Upper.quartile  
0             750     750             850  
1             950    1050            1100  
2            1200    1250            1350  
3            1400    1500            1600  
4             850     950            1050  


In [11]:
Airbnb_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126 entries, 0 to 125
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Borough           126 non-null    object 
 1   Bedroom.Category  126 non-null    object 
 2   Count_of_listing  126 non-null    int64  
 3   Mean              125 non-null    float64
 4   Lower_quartile    125 non-null    float64
 5   Median            125 non-null    float64
 6   Upper_quartile    125 non-null    float64
 7   Annual_OCC        126 non-null    float64
dtypes: float64(5), int64(1), object(2)
memory usage: 8.0+ KB


In [9]:
Rental_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131 entries, 0 to 130
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Borough           131 non-null    object
 1   Bedroom.Category  131 non-null    object
 2   Count.of.rents    131 non-null    int64 
 3   Mean              131 non-null    int64 
 4   Lower.quartile    131 non-null    int64 
 5   Median            131 non-null    int64 
 6   Upper.quartile    131 non-null    int64 
dtypes: int64(5), object(2)
memory usage: 7.3+ KB


In [4]:
neighbourhoods_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   neighbourhood        33 non-null     object  
 1   neighbourhood_group  0 non-null      object  
 2   geometry             33 non-null     geometry
dtypes: geometry(1), object(2)
memory usage: 920.0+ bytes


neighbourhood_group is full of NaNs. So we can probably get rid of those columns. We can check what other columns are completely full of NaNs.

In [5]:
def get_nan_columns(df):
    """
        Return the column names where all their data is NaN from a dataframe
    """
    nan_cols = []
    for col in df.columns:
        if df[col].isna().all():
            nan_cols.append(col)
    return nan_cols

In [12]:
neighbourhood_nans = get_nan_columns(neighbourhoods_gdf)
print(f"{neighbourhood_nans}")

['neighbourhood_group']


In [13]:
neighbourhoods_gdf.drop(neighbourhood_nans, inplace=True, axis=1)
neighbourhoods_gdf = neighbourhoods_gdf.set_index("neighbourhood")

Add statistics data to the neighbourhood_gdf 

In [None]:
neighbourhoods_gdf["price_mean"] = listings_df.groupby("neighbourhood_cleansed")["price_numeric"].mean()
neighbourhoods_gdf["price_std"] = listings_df.groupby("neighbourhood_cleansed")["price_numeric"].std()
neighbourhoods_gdf["price_median"] = listings_df.groupby("neighbourhood_cleansed")["price_numeric"].median()
neighbourhoods_gdf["price_mode"] = listings_df.groupby("neighbourhood_cleansed")["price_numeric"].agg(pd.Series.mode)
neighbourhoods_gdf

In [None]:
neighbourhoods_gdf["centre"] = neighbourhoods_gdf["geometry"].centroid;

Plot the mean price