In [1]:
import pandas as pd
import numpy as np

In [2]:
CR7 = pd.read_csv("CR7data.csv")
CR7_pen = pd.read_csv("CR7penalties.csv")
# importing Cristiano Ronaldo club goals & Penalty goals data from .csv files from Kaggle.

In [3]:
CR7.head()
# show first few rows of the dataset

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal
0,0,1,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,LW,34,2:0,Solo run
1,1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,,90+5,3:0,Header
2,2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,,88,1:2,Right-footed shot
3,3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,,67,3:0,Left-footed shot
4,4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,,13,3:0,


In [4]:
CR7.info()
# return the information types of each column

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 698 entries, 0 to 697
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Unnamed: 0    698 non-null    int64 
 1   Goal_no       698 non-null    int64 
 2   Season        698 non-null    object
 3   Competition   698 non-null    object
 4   Matchday      698 non-null    object
 5   Venue         698 non-null    object
 6   Team          698 non-null    object
 7   Opponent      698 non-null    object
 8   Result        698 non-null    object
 9   Position      640 non-null    object
 10  Minute        698 non-null    object
 11  At_score      698 non-null    object
 12  Type_of_goal  683 non-null    object
dtypes: int64(2), object(11)
memory usage: 71.0+ KB


In [5]:
# so it's a dataframe with 698 rows & 13 columns
# two columns have missing values, so hopefully only minor cleaning required
missing_values_count = CR7.isnull().sum()
print(missing_values_count)
# find the null values & sum them

Unnamed: 0       0
Goal_no          0
Season           0
Competition      0
Matchday         0
Venue            0
Team             0
Opponent         0
Result           0
Position        58
Minute           0
At_score         0
Type_of_goal    15
dtype: int64


In [6]:
def size_NA(df, name):                 # Here we take in the dataframe & column name, represented by "df" & "name"
    missing_values = df[name].isnull().sum()  # assign the number of null cells in the column to missing_values
    total_rows = df.shape[0]                 # assign the number of rows in the dataframe to total_rows
    percentNA = (missing_values / total_rows) * 100 # assign the % of rows in the df that contain null values to percentNA
    return percentNA       # And now we return this function as the variable "percentNA"

In [7]:
size_NA(CR7,'Position')

8.30945558739255

In [8]:
# based on this I wouldn't loose a significant amount of data by doing a drop NA.
droprows= CR7.dropna() # set the data with NAs dropped (rows) as droprows
print(CR7.shape,droprows.shape) # print the shape of original data, prin the shape of the data with NA rows dropped

(698, 13) (634, 13)


In [9]:
698-634

64

In [10]:
# missing 64 goals if I drop the NAs
# think I'll fill them instead
# I set my fillna function to fill NAs in the column 'Position' with "Forward" & 'Type_of_goal' with Body
# print the data frame
CR7.fillna({'Position':'Forward', 'Type_of_goal':'Body'}, inplace=True)
CR7

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal
0,0,1,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,LW,34,2:0,Solo run
1,1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,Forward,90+5,3:0,Header
2,2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,Forward,88,1:2,Right-footed shot
3,3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,Forward,67,3:0,Left-footed shot
4,4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,Forward,13,3:0,Body
...,...,...,...,...,...,...,...,...,...,...,...,...,...
693,693,694,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,32,2:0,Header
694,694,695,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,76,3:2,Direct free kick
695,695,696,21/22,Premier League,34,A,Manchester United,Arsenal FC,3:1,CF,34,2:1,Left-footed shot
696,696,697,21/22,Premier League,37,H,Manchester United,Chelsea FC,1:1,CF,62,1:1,Right-footed shot


In [11]:
# now I have no duplicates & no NAs in my data

In [12]:
CR7['# of Goals']=1
# add a column of 1s into the DF so I can perform sum, mean,etc. calculations on the data
CR7

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal,# of Goals
0,0,1,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,LW,34,2:0,Solo run,1
1,1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,Forward,90+5,3:0,Header,1
2,2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,Forward,88,1:2,Right-footed shot,1
3,3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,Forward,67,3:0,Left-footed shot,1
4,4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,Forward,13,3:0,Body,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
693,693,694,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,32,2:0,Header,1
694,694,695,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,76,3:2,Direct free kick,1
695,695,696,21/22,Premier League,34,A,Manchester United,Arsenal FC,3:1,CF,34,2:1,Left-footed shot,1
696,696,697,21/22,Premier League,37,H,Manchester United,Chelsea FC,1:1,CF,62,1:1,Right-footed shot,1


In [13]:
CR7.drop('Unnamed: 0', axis=1, inplace=True) # remove the Unnamed column
CR7

Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal,# of Goals
0,1,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,LW,34,2:0,Solo run,1
1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,Forward,90+5,3:0,Header,1
2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,Forward,88,1:2,Right-footed shot,1
3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,Forward,67,3:0,Left-footed shot,1
4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,Forward,13,3:0,Body,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
693,694,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,32,2:0,Header,1
694,695,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,76,3:2,Direct free kick,1
695,696,21/22,Premier League,34,A,Manchester United,Arsenal FC,3:1,CF,34,2:1,Left-footed shot,1
696,697,21/22,Premier League,37,H,Manchester United,Chelsea FC,1:1,CF,62,1:1,Right-footed shot,1


In [14]:
CR7_reverse = CR7.sort_values('Goal_no', ascending = False) # sort data in reverse, ie. newest goal to oldest goal
CR7_reverse

Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal,# of Goals
697,698,21/22,Premier League,35,H,Manchester United,Brentford FC,3:0,CF,61,2:0,Penalty,1
696,697,21/22,Premier League,37,H,Manchester United,Chelsea FC,1:1,CF,62,1:1,Right-footed shot,1
695,696,21/22,Premier League,34,A,Manchester United,Arsenal FC,3:1,CF,34,2:1,Left-footed shot,1
694,695,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,76,3:2,Direct free kick,1
693,694,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,32,2:0,Header,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,Forward,13,3:0,Body,1
3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,Forward,67,3:0,Left-footed shot,1
2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,Forward,88,1:2,Right-footed shot,1
1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,Forward,90+5,3:0,Header,1


In [15]:
CR7.groupby('Position').mean('# of Goals')

Unnamed: 0_level_0,Goal_no,# of Goals
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
CF,512.459596,1.0
CF,506.0,1.0
Forward,68.137931,1.0
LW,361.278873,1.0
LW,360.2,1.0
RW,88.088608,1.0


In [16]:
CR7.groupby('Position').mean()

Unnamed: 0_level_0,Goal_no,# of Goals
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
CF,512.459596,1.0
CF,506.0,1.0
Forward,68.137931,1.0
LW,361.278873,1.0
LW,360.2,1.0
RW,88.088608,1.0


In [17]:
CR7.groupby('Position')['# of Goals'].sum()

Position
CF         198
CF           3
Forward     58
LW         355
LW           5
RW          79
Name: # of Goals, dtype: int64

In [18]:
print(CR7)

     Goal_no Season                Competition      Matchday Venue  \
0          1    2/3              Liga Portugal             6     H   
1          2    2/3              Liga Portugal             6     H   
2          3    2/3              Liga Portugal             8     A   
3          4    2/3  TaÃ§a de Portugal Placard  Fourth Round     H   
4          5    2/3  TaÃ§a de Portugal Placard   Fifth Round     H   
..       ...    ...                        ...           ...   ...   
693      694  21/22             Premier League            33     H   
694      695  21/22             Premier League            33     H   
695      696  21/22             Premier League            34     A   
696      697  21/22             Premier League            37     H   
697      698  21/22             Premier League            35     H   

                  Team                 Opponent Result Position Minute  \
0          Sporting CP            Moreirense FC    3:0       LW     34   
1          

In [19]:
CR7['Position'] = CR7['Position'].str.replace('CF','CF')
# overwirte instances of CF, trying to stop the duplication in position when we grouby position

In [20]:
CR7.groupby('Position')['# of Goals'].sum()
# print groupby again to see did it work

Position
CF         198
CF           3
Forward     58
LW         355
LW           5
RW          79
Name: # of Goals, dtype: int64

In [21]:
CR7['Position']
# print the list on entries in 'Position'

0           LW
1      Forward
2      Forward
3      Forward
4      Forward
        ...   
693         CF
694         CF
695         CF
696         CF
697         CF
Name: Position, Length: 698, dtype: object

In [36]:
CR7

Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal,# of Goals
0,1,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,LW,34,2:0,Solo run,1
1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,Forward,90+5,3:0,Header,1
2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,Forward,88,1:2,Right-footed shot,1
3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,Forward,67,3:0,Left-footed shot,1
4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,Forward,13,3:0,Body,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
693,694,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,32,2:0,Header,1
694,695,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,76,3:2,Direct free kick,1
695,696,21/22,Premier League,34,A,Manchester United,Arsenal FC,3:1,CF,34,2:1,Left-footed shot,1
696,697,21/22,Premier League,37,H,Manchester United,Chelsea FC,1:1,CF,62,1:1,Right-footed shot,1


In [52]:
Position = {}
Position['Position'] = [d for d in CR7.Position]
print(Position)
# used a for loop to save Position column as a dictionary & print the dictionary
# trying to find the anomalies in the position - ie. why I have 2 CFs and 2 LWs in the groupby.

{'Position': ['LW', 'Forward', 'Forward', 'Forward', 'Forward', 'RW', 'RW', 'Forward', 'Forward', 'Forward', 'RW', 'Forward', 'Forward', 'Forward', 'RW', 'RW', 'Forward', 'Forward', 'Forward', 'RW', 'Forward', 'Forward', 'RW', 'Forward', 'Forward', 'Forward', 'Forward', 'RW', 'RW', 'RW', 'Forward', 'RW', 'Forward', 'CF', 'Forward', 'RW', 'Forward', 'Forward', 'Forward', 'Forward', 'Forward', 'Forward', 'RW', 'RW', 'Forward', 'RW', 'RW', 'Forward', 'RW', 'Forward', 'RW', 'RW', 'Forward', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'Forward', 'Forward', 'Forward', 'RW', 'RW', 'Forward', 'RW', 'Forward', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'Forward', 'Forward', 'Forward', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'LW', 'Forward', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'LW', 'Forward', 'LW', 'LW', 'LW', 'CF', 'Forward', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'RW', 'CF', 'CF', 'RW', 'RW', 'Forward', 'CF', 'CF', 'RW'

In [53]:
CR7['Position'] = CR7['Position'].str.replace('CF ','CF')
CR7['Position'] = CR7['Position'].str.replace('LW ','LW')
# overwirte instances of CF, trying to stop the duplication in position when we grouby position

In [54]:
CR7.groupby('Position')['# of Goals'].sum()
# print groupby again to see did it work

Position
CF         201
Forward     58
LW         360
RW          79
Name: # of Goals, dtype: int64

In [55]:
#Bingo

In [65]:
CR7.groupby('Minute')['# of Goals'].sum()

Minute
1        1
10      10
11       6
12       9
120      1
        ..
90+4     2
90+5     2
90+6     2
90+7     1
97       1
Name: # of Goals, Length: 102, dtype: int64

In [67]:
CR7.groupby(['Position','Minute','# of Goals']).apply(np.sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal,# of Goals
Position,Minute,# of Goals,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
CF,10,1,2328,16/1716/1719/2020/21,UEFA Champions LeagueLaLigaSerie ASerie A,Semi-Finals211927,HAAA,Real MadridReal MadridJuventus FCJuventus FC,Atlatico de MadridCelta de VigoAS RomaCagliari...,3:01:41:21:3,CFCFCFCF,10101010,1:00:10:20:1,HeaderLeft-footed shotPenaltyHeader,4
CF,11,1,684,8/917/18,UEFA Champions LeagueLaLiga,Semi-Finals29,AH,Manchester UnitedReal Madrid,Arsenal FCGirona FC,1:36:3,CFCF,1111,0:21:0,Direct free kickLeft-footed shot,2
CF,12,1,1221,17/1821/22,UEFA Champions LeaguePremier League,Group Stage29,HH,Real MadridManchester United,APOEL NicosiaTottenham Hotspur,3:03:2,CFCF,1212,1:01:0,Counter attack goalRight-footed shot,2
CF,13,1,3248,10/1114/1520/2120/2120/2121/22,Copa del ReyLaLigaUEFA Champions LeagueSerie A...,Final8Group Stage213Group Stage,AAAHHA,Real MadridReal MadridJuventus FCJuventus FCJu...,FC BarcelonaLevante UDFC BarcelonaAS RomaSSC N...,0:1 AET0:50:32:02:12:1,CFCFCFCFCFCF,131313131313,0:10:10:11:01:00:1,HeaderPenaltyPenaltyLeft-footed shotRight-foot...,6
CF,14,1,449,15/16,LaLiga,10,H,Real Madrid,UD Las Palmas,3:1,CF,14,2:0,Header,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
RW,87,1,71,7/8,Premier League,18,H,Manchester United,Everton FC,2:1,RW,87,2:1,Penalty,1
RW,88,1,326,7/87/87/88/9,UEFA Champions LeaguePremier LeagueFA CupPremi...,Group Stage22Fourth Round13,HHHH,Manchester UnitedManchester UnitedManchester U...,Dynamo KyivNewcastle UnitedTottenham HotspurSt...,4:06:03:15:0,RWRWRWRW,88888888,4:05:03:15:0,Right-footed shotLeft-footed shotRight-footed ...,4
RW,9,1,92,7/8,Premier League,33,A,Manchester United,Middlesbrough FC,2:2,RW,9,0:1,Left-footed shot,1
RW,90+2,1,78,7/8,Premier League,23,A,Manchester United,Reading FC,0:2,RW,90+2,0:2,Right-footed shot,1


In [68]:
CR7.groupby(['Position','Minute','# of Goals']).apply(np.std)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Goal_no,# of Goals
Position,Minute,# of Goals,Unnamed: 3_level_1,Unnamed: 4_level_1
CF,10,1,61.995968,0.0
CF,11,1,221.000000,0.0
CF,12,1,79.500000,0.0
CF,13,1,182.917711,0.0
CF,14,1,0.000000,0.0
...,...,...,...,...
RW,87,1,0.000000,0.0
RW,88,1,15.532225,0.0
RW,9,1,0.000000,0.0
RW,90+2,1,0.000000,0.0


In [96]:
PositionGoals = pd.DataFrame(CR7.groupby('Position')['# of Goals'].sum())
PositionGoals
# Create a subset dataset of the CR7 goal data (Position) & print it

Unnamed: 0_level_0,# of Goals
Position,Unnamed: 1_level_1
CF,201
Forward,58
LW,360
RW,79


In [86]:
LeagueGoals = pd.DataFrame(CR7.groupby('Competition')['# of Goals'].sum())
LeagueGoals
# Create a subset dataset of the CR7 goal data (Competition) & print it

Unnamed: 0_level_0,# of Goals
Competition,Unnamed: 1_level_1
Copa del Rey,22
EFL Cup,4
FA Cup,13
FIFA Club World Cup,7
Italy Cup,4
LaLiga,311
Liga Portugal,3
Premier League,102
Serie A,81
Supercopa,4


In [92]:
CR7.groupby(['Competition','Team'])['# of Goals'].apply(np.sum)

Competition                       Team             
Copa del Rey                      Real Madrid           22
EFL Cup                           Manchester United      4
FA Cup                            Manchester United     13
FIFA Club World Cup               Manchester United      1
                                  Real Madrid            6
Italy Cup                         Juventus FC            4
LaLiga                            Real Madrid          311
Liga Portugal                     Sporting CP            3
Premier League                    Manchester United    102
Serie A                           Juventus FC           81
Supercopa                         Real Madrid            4
Supercoppa Italiana               Juventus FC            2
TaÃ§a de Portugal Placard         Sporting CP            2
UEFA Champions League             Juventus FC           14
                                  Manchester United     21
                                  Real Madrid          105
UEFA

In [93]:
CompGoals = pd.DataFrame(CR7.groupby(['Competition','Team'])['# of Goals'].apply(np.sum))
CompGoals
# Create a subset dataset of the CR7 goal data (Competition & Team) & print it

Unnamed: 0_level_0,Unnamed: 1_level_0,# of Goals
Competition,Team,Unnamed: 2_level_1
Copa del Rey,Real Madrid,22
EFL Cup,Manchester United,4
FA Cup,Manchester United,13
FIFA Club World Cup,Manchester United,1
FIFA Club World Cup,Real Madrid,6
Italy Cup,Juventus FC,4
LaLiga,Real Madrid,311
Liga Portugal,Sporting CP,3
Premier League,Manchester United,102
Serie A,Juventus FC,81


In [95]:
CompGoals = pd.DataFrame(CR7.groupby(['Competition','Team','Position'])['# of Goals'].apply(np.sum))
CompGoals
# Create a subset dataset of the CR7 goal data (Competition, Team & Position) & print it

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# of Goals
Competition,Team,Position,Unnamed: 3_level_1
Copa del Rey,Real Madrid,CF,4
Copa del Rey,Real Madrid,Forward,1
Copa del Rey,Real Madrid,LW,17
EFL Cup,Manchester United,Forward,2
EFL Cup,Manchester United,RW,2
FA Cup,Manchester United,CF,1
FA Cup,Manchester United,Forward,6
FA Cup,Manchester United,RW,6
FIFA Club World Cup,Manchester United,LW,1
FIFA Club World Cup,Real Madrid,LW,6


In [170]:
CR7.set_index('Goal_no')
MinuteGoals = pd.DataFrame(CR7.groupby(['Minute'])['# of Goals'].apply(np.sum)).reset_index()
MinuteGoals
# Create a subset dataset of the CR7 goal data (Minute) & print it

Unnamed: 0,Minute,# of Goals
0,1,1
1,10,10
2,11,6
3,12,9
4,120,1
...,...,...
97,90+4,2
98,90+5,2
99,90+6,2
100,90+7,1


In [181]:
FirstHalf = MinuteGoals[MinuteGoals['Minute'].str.contains('45+')]
FirstHalf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 40 to 43
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Minute      4 non-null      object
 1   # of Goals  4 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 96.0+ bytes


In [195]:
FirstHalf.groupby(['Minute'])['# of Goals'].sum().reset_index()

Unnamed: 0,Minute,# of Goals
0,45,15
1,45+1,5
2,45+2,1
3,45+7,1
