# Requirement:

>The name of the team

>The total number of goals scored by the team during the 2011 season

>The total number of wins the team earned during the 2011 season

>A histogram visualization of the team's wins and losses for the 2011 season (store the visualization directly)

>The team's win percentage on days where it was raining during games in the 2011 season.


# T1 = Matches: 

>Match_ID (int): unique ID per match

>Div (str): identifies the division the match was played in (D1 = Bundesliga, D2 = Bundesliga 2, E0 = English 

>Premier League)

>Season (int): Season the match took place in (usually covering the period of August till May of the following year)

>Date (str): Date of the match

>HomeTeam (str): Name of the home team

>AwayTeam (str): Name of the away team

>FTHG (int) (Full Time Home Goals): Number of goals scored by the home team

>FTAG (int) (Full Time Away Goals): Number of goals scored by the away team

>FTR (str) (Full Time Result): 3-way result of the match (H = Home Win, D = Draw, A = Away Win)

# T2 = Teams

>Season (str): Football season for which the data is valid

>TeamName (str): Name of the team the data concerns

>KaderHome (str): Number of Players in the squad

>AvgAgeHome (str): Average age of players

>ForeignPlayersHome (str): Number of foreign players (non-German, non-English respectively) playing for the team

>OverallMarketValueHome (str): Overall market value of the team pre-season in EUR (based on data from transfermarkt.de)

>AvgMarketValueHome (str): Average market value (per player) of the team pre-season in EUR (based on data from transfermarkt.de)

>StadiumCapacity (str): Maximum stadium capacity of the team's home stadium

# T3 = Unique Teams

> TeamName (str): Name of a team

> Unique_Team_ID (int): Unique identifier for each team

# T4 = Teams_in_Matches

>Match_ID (int): Unique match ID

>Unique_Team_ID (int): Unique team ID (This table is used to easily retrieve each match a given team has played in)

In [165]:
#This is where we import the relevant packages to be used. 
#SQL to organise our data / Pandas & Numpy for the second stage of anlaysis.

import sqlite3 
import pandas as pd
import numpy as np
import requests
import json

In [166]:
conn = sqlite3.connect('database.sqlite')
cur = conn.cursor()

In [167]:
cur.execute("""SELECT * FROM Matches LIMIT 10""")

<sqlite3.Cursor at 0x18e716730>

In [168]:
df = pd.DataFrame(cur.fetchall())
df.columns = [x[0] for x in cur.description]
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1,D2,2009,2010-04-04,Oberhausen,Kaiserslautern,2,1,H
1,2,D2,2009,2009-11-01,Munich 1860,Kaiserslautern,0,1,A
2,3,D2,2009,2009-10-04,Frankfurt FSV,Kaiserslautern,1,1,D
3,4,D2,2009,2010-02-21,Frankfurt FSV,Karlsruhe,2,1,H
4,5,D2,2009,2009-12-06,Ahlen,Karlsruhe,1,3,A


In [169]:
cur.execute("""SELECT * FROM Teams LIMIT 10""")
df1 = pd.DataFrame(cur.fetchall())
df1.columns = [x[0] for x in cur.description]
df1.head()

Unnamed: 0,Season,TeamName,KaderHome,AvgAgeHome,ForeignPlayersHome,OverallMarketValueHome,AvgMarketValueHome,StadiumCapacity
0,2017,Bayern Munich,27,26,15,597950000,22150000,75000
1,2017,Dortmund,33,25,18,416730000,12630000,81359
2,2017,Leverkusen,31,24,15,222600000,7180000,30210
3,2017,RB Leipzig,30,23,15,180130000,6000000,42959
4,2017,Schalke 04,29,24,17,179550000,6190000,62271


In [170]:
cur.execute("""SELECT * FROM Teams_in_Matches LIMIT 10""")
df2 = pd.DataFrame(cur.fetchall())
df2.columns = [x[0] for x in cur.description]
df2.head()

Unnamed: 0,Match_ID,Unique_Team_ID
0,1,26
1,1,46
2,2,26
3,2,42
4,3,26


In [171]:
cur.execute("""SELECT * FROM Unique_Teams LIMIT 10""")
df3 = pd.DataFrame(cur.fetchall())
df3.columns = [x[0] for x in cur.description]
df3.head()

Unnamed: 0,TeamName,Unique_Team_ID
0,Bayern Munich,1
1,Dortmund,2
2,Leverkusen,3
3,RB Leipzig,4
4,Schalke 04,5


In [172]:
#STEP 1 : JOIN T3 & T4 then JOIN T1

cur.execute("""SELECT * FROM Teams_in_Matches JOIN Unique_Teams USING (Unique_Team_ID) JOIN Matches 
               USING (Match_ID) WHERE Season = 2011""")
pdset = pd.DataFrame(cur.fetchall())
pdset.columns = [x[0] for x in cur.description]
pdset.head()

Unnamed: 0,Match_ID,Unique_Team_ID,TeamName,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1092,1,Bayern Munich,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A
1,1092,22,Nurnberg,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A
2,1093,1,Bayern Munich,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A
3,1093,17,Stuttgart,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A
4,1094,1,Bayern Munich,D1,2011,2011-08-13,Wolfsburg,Bayern Munich,0,1,A


In [173]:
#Adding result of the TeamName (we'll refer to TeamName as Main Team)

for i, row in pdset.iterrows():
    if row.TeamName == row.AwayTeam:
        # Away
        if row.FTR == 'A':
            pdset.loc[i,'Result'] = "W"
        elif row.FTR == 'D':
            pdset.loc[i,'Result'] = "D"
        else:
            pdset.loc[i,'Result'] = "L"
    else:
        # Home
        if row.FTR == 'H':
            pdset.loc[i,'Result'] = "W"
        elif row.FTR == 'D':
            pdset.loc[i,'Result'] = "D"
        else:
            pdset.loc[i,'Result'] = "L"

#Adding number of goals for the main team 
            
pdset['goals_scored'] = np.where(pdset['TeamName'] == pdset ['AwayTeam'], pdset['FTAG'], pdset['FTHG'])
pdset.head()


Unnamed: 0,Match_ID,Unique_Team_ID,TeamName,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Result,goals_scored
0,1092,1,Bayern Munich,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A,W,1
1,1092,22,Nurnberg,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A,L,0
2,1093,1,Bayern Munich,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A,W,2
3,1093,17,Stuttgart,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A,L,1
4,1094,1,Bayern Munich,D1,2011,2011-08-13,Wolfsburg,Bayern Munich,0,1,A,W,1


In [174]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [175]:
#API Exploration

keys = get_keys("/Users/augustingoudet/.secret/code_file")
api_key = keys['dark_sky_key']

latitude = 52.5200
longitude = 13.4050
time = 1577836800

url = "https://api.darksky.net/forecast/{}/{},{}".format(api_key, latitude, longitude, time)

dark_sky = requests.get(url)
print(dark_sky.text)


{"latitude":52.52,"longitude":13.405,"timezone":"Europe/Berlin","currently":{"time":1580319320,"summary":"Possible Drizzle","icon":"rain","precipIntensity":0.0097,"precipProbability":0.33,"precipType":"rain","temperature":39.19,"apparentTemperature":30.41,"dewPoint":35.3,"humidity":0.86,"pressure":1004.1,"windSpeed":16.31,"windGust":29.16,"windBearing":265,"cloudCover":0.98,"uvIndex":0,"visibility":10,"ozone":390.2},"hourly":{"summary":"Light rain starting tonight.","icon":"rain","data":[{"time":1580317200,"summary":"Possible Drizzle","icon":"rain","precipIntensity":0.0087,"precipProbability":0.35,"precipType":"rain","temperature":39.2,"apparentTemperature":30.34,"dewPoint":35.13,"humidity":0.85,"pressure":1003.5,"windSpeed":16.56,"windGust":29.77,"windBearing":268,"cloudCover":0.98,"uvIndex":0,"visibility":10,"ozone":390.3},{"time":1580320800,"summary":"Possible Drizzle","icon":"rain","precipIntensity":0.0096,"precipProbability":0.3,"precipType":"rain","temperature":39.22,"apparentTem

In [176]:
print(dark_sky.headers['Date'])

Wed, 29 Jan 2020 17:35:20 GMT


In [194]:
response = dark_sky.json().keys()
response

dict_keys(['latitude', 'longitude', 'timezone', 'currently', 'hourly', 'daily', 'flags', 'offset'])

In [202]:
response.json()['currently'][:2]

AttributeError: 'dict_keys' object has no attribute 'json'

In [179]:
# You don't have to use these classes, but we recommend them as a good place to start!
class WeatherGetter():
    
    pass

In [180]:
class MongoHandler():
    pass
