/
basketballapp.py
92 lines (74 loc) · 4.71 KB
/
basketballapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
"""Untitled16.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1d3VvGtnYUEuoP2ivZ_Y7Vm6djFaZBBCo
"""
"""NBA PLAYER STATS EXPLORER"""
#we are going to dynamically get info from internet using web scrapping this process is done using pandas
#importing required libraries
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import base64 #this library is used to data downloadas csv file bcoz its gng to convert the aski to byteconversion
st.title('NBA Player Stats Explorer') #title of our app
#we are gng to write some description of our app
st.markdown("""
This app performs simple webscrapping of NBA players stats data!
* **Python libraries :** base64, pandas, streamlit
* **Data source :** [Basketball-reference.com](https://www.basketball-reference.com/)
""")
#now lets code for the side bar
st.sidebar.header('User Input Features') #heading of the sidebar
selected_year = st.sidebar.selectbox('Year', list(reversed(range(1950,2022)))) #here we have the years from 1950 to 2020 in reversed order so we get latest at top, it is like dropdown
#in this block we are gng to code for webscrapping and data preprocessing
#web scrapping of nba player stats
@st.cache
def load_data(year):
url = "https://www.basketball-reference.com/league/NBA_" + str(year) + "_per_game.html"
html = pd.read_html(url, header =0) #this one line is used for webscrapping the data in this website is in the form of table so pandas can easily read this
df = html[0]
raw = df.drop(df[df.Age == 'Age'].index) #here we are removing some of the non useful data
raw = raw.fillna(0)
playerstats = raw.drop(['Rk'], axis=1) #this is also simple deletion of some unwanted index this is normally given by pandas
return playerstats #we are returing the preprocessed data
playerstats = load_data(selected_year) #we r gng to load the data and the selected year is the input argument
#so this custom function will retrive the data from the website according to the input argument selected_year this var accepts input from dropdown menu
#we r gng to write code to allow user to select the teams
sorted_unique_team = sorted(playerstats.Tm.unique()) #in this we are sgng to sort the team column and display only the unique names and it will be sorted alphabetically
selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team)
#we r gng to write code to allow user to select the position
unique_pos = ['C','PF','SF','PG','SG']
selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos) #here we write two times unique position bcoz by default it will display all the values
"""whenever we change the year for the first time it takes time to load thn it will be instantaneous that is it uses cache files"""
#filtering data
#that is filtering the data based on the sidebar selection
df_selected_team = playerstats[(playerstats.Tm.isin(selected_team))&(playerstats.Pos.isin(selected_pos))] #this line of code is very powerful that is it helps in selecting the condition in pandas df, this code is helpful when we work with data wrangling dats cleaning
st.header('Display Player Stats of selected Team(s)')
st.write('Data Dimension: '+ str(df_selected_team.shape[0] + ' row and' + str(df_selected_team.shape[1])+ 'columns.'))
st.dataframe(df_selected_team)
#download the nba player stats
# https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806 , code is refered from this website
def filedownload(df):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
href = f'<a href="data:file/csv;base64,{b64}" download="playerstats.csv">Download CSV File</a>'
return href
#this four lines of code helps us to create a download link in our web app
st.markdown(filedownload(df_selected_team), unsafe_allow_html=True) #helps in csv file download
#now lets write code to create a heat map
# Heatmap
if st.button('Intercorrelation Heatmap'):
st.header('Intercorrelation Matrix Heatmap')
df_selected_team.to_csv('output.csv',index=False)
df = pd.read_csv('output.csv') #this to lines is we are saving it in csv file and reading it again this is done bcoz directly using selected team is not working so we are exporting it as file and using it to create heatmap it works perfectly
corr = df.corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
f, ax = plt.subplots(figsize=(7, 5))
ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
st.pyplot()
#this is the code to perform correlation so that the datas could be differentiated and we could produce a heat map