# Table of Contents
      - Circuits 
      - Drivers 
      - Constructors
      
## TOC:
* [Import Libraries](#importt-libraries)
* [Analysis of Circuits](#ana-bullet)

# Import Libraries

In [24]:
import os
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore')

# Data visualisation libraries
import plotly.offline as py # Plotly
from plotly import tools
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected = True)  

import folium # used for visualizing geospatial data                                      
import seaborn as sns
import matplotlib.pyplot as plt # Plotting graphs
from matplotlib import rcParams # used for validating 
from folium.plugins import MarkerCluster # viisualising clusters

%matplotlib inline

In [27]:
import random
def random_colors(number_of_colors):
    color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
                 for i in range(number_of_colors)]
    return color

# Analysis of Circuits

In [7]:
circuits = pd.read_csv('./race-data/circuits.csv', encoding='ISO-8859-1')

Exploring the features of this data

In [9]:
print(circuits.columns.values)

['circuitId' 'circuitRef' 'name' 'location' 'country' 'lat' 'lng' 'alt'
 'url']


In [10]:
# preview of the data
circuits.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10.0,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,,http://en.wikipedia.org/wiki/Sepang_Internatio...
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,,http://en.wikipedia.org/wiki/Bahrain_Internati...
3,4,catalunya,Circuit de Barcelona-Catalunya,MontmelÌ_,Spain,41.57,2.26111,,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,,http://en.wikipedia.org/wiki/Istanbul_Park


Are there any features with null or missing values?
They will need to be corrected.

In [11]:
circuits.isna().sum()

circuitId      0
circuitRef     0
name           0
location       0
country        0
lat            0
lng            0
alt           72
url            0
dtype: int64

Since alt contains 72 null values of 73 total, and we won't take this feature into consideration during our research. It will be dropped.

In [14]:
circuits = circuits.drop(['alt'], axis = 1)

In [16]:
# Now we need to see the data types of our features
circuits.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73 entries, 0 to 72
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   circuitId   73 non-null     int64  
 1   circuitRef  73 non-null     object 
 2   name        73 non-null     object 
 3   location    73 non-null     object 
 4   country     73 non-null     object 
 5   lat         73 non-null     float64
 6   lng         73 non-null     float64
 7   url         73 non-null     object 
dtypes: float64(2), int64(1), object(5)
memory usage: 4.7+ KB


### Visualising circuits around the world on a world map using clusters

This graphical representation should be really useful to determine the overall amount of racetracks in each continent.

In [19]:
circuits_folium = circuits[['name', 'location', 'country', 'lat', 'lng']]
circuits_folium.head()

Unnamed: 0,name,location,country,lat,lng
0,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968
1,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738
2,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106
3,Circuit de Barcelona-Catalunya,MontmelÌ_,Spain,41.57,2.26111
4,Istanbul Park,Istanbul,Turkey,40.9517,29.405


In [21]:
circuits_map = folium.Map(tiles="cartodbpositron")
marker_cluster = MarkerCluster().add_to(circuits_map)
for i in range(len(circuits_folium)):
    lat = circuits_folium.iloc[i]['lat']
    lng = circuits_folium.iloc[i]['lng']
    radius = 5
    popup_text = circuits_folium.iloc[i]['name']
    folium.CircleMarker(location = [lat, lng], radius = radius, 
                        popup = popup_text, fill = True).add_to(marker_cluster)
circuits_map

### Top 10 countries by number of circuits

In [82]:
country = circuits[['country', 'name']].groupby(['country'], as_index = False).count().sort_values(by = 'name', ascending = False)

In [96]:
country = country[['country','name']][:10]
country.shape

(10, 2)

In [103]:
data = [go.Bar(x = country['country'],
               y = country['name'],
               marker = dict(color = random_colors(100)))]

layout = dict(title = "Countries with Most F1 Circuits")
fig = dict(data = data, layout = layout)
iplot(fig, validate = False)

### Most hosted Grand Prix

In [31]:
races = pd.read_csv('./race-data/races.csv')
races.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...
1,2,2009,2,2,Malaysian Grand Prix,2009-04-05,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...
2,3,2009,3,17,Chinese Grand Prix,2009-04-19,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Gran...
3,4,2009,4,3,Bahrain Grand Prix,2009-04-26,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...
4,5,2009,5,4,Spanish Grand Prix,2009-05-10,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Gran...


In [42]:
win_count = races['name'].value_counts()
data = [go.Bar(x = win_count.index,
               y = win_count.values,
               marker = dict(color = random_colors(100)))]

layout = dict(title = "No of Most Hosted Grand Prix (Upto 2017)")
fig = dict(data = data, layout = layout)
iplot(fig, validate = False)

### What are the circuits climate zones?

In [80]:
climate_zone = ['mwc', 'troprain', 'des', 'mwc', 'mwc', 'med', 'hcont', 'mwc', 'mwc', 'mwc', 'mwc', 'med', 'mwc', 'mwc', 'troprain', 'humsub', 'humsub', 'tropsav', 'hcont', 'mwc', 'mwc', 'humsub', 'mwc', 'des', 'humsub', 'med', 'med', 'humsub', 'steppe', 'humsub', 'mwc', 'humsub', 'des', 'med', 'humsub', 'humsub', 'humsub', 'mwc', 'mwc', 'mwc', 'mwc', 'humsub', 'steppe', 'humsub', 'med', 'hcont', 'mwc', 'sarc', 'mwc', 'mwc', 'mwc', 'hcont', 'mwc', 'mwc', 'mwc', 'mwc', 'hcont', 'mwc', 'med', 'steppe', 'mwc', 'med', 'humsub', 'med', 'mwc', 'mwc', 'mwc', 'humsub', 'humsub', 'hcont', 'mwc', 'humsub', 'steppe']
circuits['climate_zone'] = climate_zone
circuits_climate = circuits[['climate_zone','name']].groupby(['climate_zone'], as_index = False).count().sort_values(by='name', ascending = False)

circuits_climate.shape

(9, 2)

In [93]:
climate_label = {'Name': ['Marine West Coast', 'Humid Subtropical', 'Mediterranean', 'Humid Continental', 
         'Steppe', 'Dessert', 'Tropical Rain', 'Polar', 'Tropical Sav']}
circuits_climate['climate_zone'] = climate_label['Name']

trace = go.Pie(labels = circuits_climate['climate_zone'], 
               values = circuits_climate['name'], 
               hole = 0.6, textinfo = 'none')

layout = go.Layout(title='Climate Zones with F1 Circuits')
fig = go.Figure(data = [trace], layout = layout)
iplot(fig, filename = "plotting-library")

# Analysis of Drivers