# ***ANALYSIS ON ZUPEE TRUMP CARDS MANIA GAME***

---




***PART 1: EXTRACTING THE DATA FROM THE WEB.***

In [3]:
# Installing the necessary packages for extracting web data
!pip install beautifulsoup4
!pip install requests



In [4]:
# The link for downloading the stats
link = "https://www.zupee.com/trump-cards-mania-more-information/"

In [5]:
# Importing the packages
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

In [6]:
# Send a GET request to the web page
url = link
response = requests.get(url)
html_content = response.content

In [7]:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

In [8]:
# Find all the table elements on the page, because the data is stored as a table on the web page,
# you can inspect that web page to confirm
tables = soup.find_all('table')

In [9]:
# Define the headers for the CSV file or the dataframe
headers = ['Name', 'Matches', 'Runs', '6s', '4s', 'Strike Rate', 'High Score']

# Create a list to store the table data
data = []

In [10]:
# Iterate over each table
for table in tables:
    # Process the table as per your requirement
    rows = table.find_all('tr')
    for row in rows:
        columns = row.find_all('td')
        # Ignore rows that don't have the expected number of columns
        if len(columns) == len(headers):
            # Extract the text from each column
            row_data = [column.text.strip() for column in columns]
            data.append(row_data)

In [11]:
# do you want to see how the data looks right now as a list
data

[['Virat Kohli', '237', '7263', '235', '644', '130.04', '113'],
 ['Shikhar Dhawan', '217', '6617', '149', '750', '127.18', '106'],
 ['David Warner', '176', '6397', '226', '646', '139.92', '126'],
 ['Rohit Sharma', '243', '6211', '258', '554', '130.05', '109'],
 ['Suresh Raina', '205', '5528', '203', '506', '136.7', '100'],
 ['AB de Villiers', '184', '5162', '251', '413', '151.69', '133'],
 ['MS Dhoni', '250', '5082', '239', '349', '135.92', '84'],
 ['Chris Gayle', '142', '4965', '357', '405', '148.96', '175'],
 ['Robin Uthappa', '205', '4952', '182', '481', '130.35', '88'],
 ['Dinesh Karthik', '242', '4516', '139', '439', '132.71', '97'],
 ['Ajinkya Rahane', '172', '4400', '96', '455', '123.42', '105'],
 ['Gautam Gambhir', '154', '4217', '59', '492', '123.88', '93'],
 ['Ambati Rayudu', '204', '4348', '173', '359', '127.54', '100'],
 ['KL Rahul', '118', '4163', '168', '355', '134.42', '132'],
 ['Faf du Plessis', '130', '4133', '145', '375', '134.14', '96'],
 ['Sanju Samson', '152', '388

In [12]:
# Saving the data as a CSV file
filename = 'zupeeData.csv'
with open(filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)
    writer.writerows(data)

print(f"Data has been successfully stored in {filename}.")

Data has been successfully stored in zupeeData.csv.


**That's it! We have successfully downloaded the complete stats for the Trump Card Mania Game on Zupee.**

***PART 2: READING THE DATA AS CSV THEN CONVERTING IT INTO A DATAFRAME.***

*****Now, as we have the data we can do many things on this data.
So, why wait?
Let's GOOO!*****

In [18]:
# Let's create a dataframe to get comfortable with the data
data = pd.read_csv('/content/zupeeData.csv')

In [15]:
# View the top 5 data
data.head(5)

Unnamed: 0,Name,Matches,Runs,6s,4s,Strike Rate,High Score
0,Virat Kohli,237,7263,235,644,130.04,113
1,Shikhar Dhawan,217,6617,149,750,127.18,106
2,David Warner,176,6397,226,646,139.92,126
3,Rohit Sharma,243,6211,258,554,130.05,109
4,Suresh Raina,205,5528,203,506,136.7,100


In [16]:
# for some data analysis convenience, lets lower the player name
data["Name"] = data["Name"].str.strip().str.lower()
data.head(5)

Unnamed: 0,Name,Matches,Runs,6s,4s,Strike Rate,High Score
0,virat kohli,237,7263,235,644,130.04,113
1,shikhar dhawan,217,6617,149,750,127.18,106
2,david warner,176,6397,226,646,139.92,126
3,rohit sharma,243,6211,258,554,130.05,109
4,suresh raina,205,5528,203,506,136.7,100


***PART 3: DIVIDING THE DATA INTO 3 DIFFERENT CATEGORIES, as per Zupee.***

In [17]:
'''Let's divide our data as the player category on zupee
Distributions are as follows:
Top 30: Gold
Next 30: Silver
Remaining: Bronze'''

# Gold DataFrame
goldPlayers = data.iloc[:30].reset_index(drop=True)
goldPlayers.head(5)

Unnamed: 0,Name,Matches,Runs,6s,4s,Strike Rate,High Score
0,virat kohli,237,7263,235,644,130.04,113
1,shikhar dhawan,217,6617,149,750,127.18,106
2,david warner,176,6397,226,646,139.92,126
3,rohit sharma,243,6211,258,554,130.05,109
4,suresh raina,205,5528,203,506,136.7,100


In [18]:
# Silver DataFrame
silverPlayers = data.iloc[30:60].reset_index(drop=True)
silverPlayers.head(5)

Unnamed: 0,Name,Matches,Runs,6s,4s,Strike Rate,High Score
0,virender sehwag,104,2728,106,334,155.44,122
1,glenn maxwell,124,2719,158,227,157.62,95
2,david miller,121,2714,126,187,138.4,101
3,ravindra jadeja,226,2692,99,193,128.62,62
4,murali vijay,106,2619,91,247,121.87,127


In [19]:
# Bronze DataFrame
bronzePlayers = data.iloc[60:].reset_index(drop=True)
bronzePlayers.head(5)

Unnamed: 0,Name,Matches,Runs,6s,4s,Strike Rate,High Score
0,devdutt padikkal,57,1521,42,167,125.39,101
1,krunal pandya,113,1514,56,136,133.39,86
2,karun nair,76,1496,39,161,127.75,83
3,saurabh tiwary,93,1494,50,111,120.1,61
4,marcus stoinis,82,1478,75,111,140.63,89


***PART 4: CAPTAINCY SELECTION FUNCTION ACROSS CATEGORIES.***

In [20]:
"""Lets try to create a function that will create a new column on our dataframe
that shows the maximized value for selecting that player as a captain on respective categories"""

# Starting with bronze
# bronzeMatches = [m for m in bronzePlayers['Matches']]
# bronzeRuns = [r for r in bronzePlayers['Runs']]
# bronzeSixes = [s for s in bronzePlayers['6s']]
# bronzeFours = [f for f in bronzePlayers['4s']]
# bronzeSR = [sr for sr in bronzePlayers['Strike Rate']]
# bronzeHS = [hs for hs in bronzePlayers['High Score']]

def countChances(i):
  count = 0
  for x in range(len(bronzePlayers)):
    if x == i:
      continue

    if bronzePlayers['Matches'][i] > bronzePlayers['Matches'][x]:
      count+=1
    if bronzePlayers['Runs'][i] > bronzePlayers['Runs'][x]:
      count+=1
    if bronzePlayers['6s'][i] > bronzePlayers['6s'][x]:
      count+=1
    if bronzePlayers['4s'][i] > bronzePlayers['4s'][x]:
      count+=1
    if bronzePlayers['Strike Rate'][i] > bronzePlayers['Strike Rate'][x]:
      count+=1
    if bronzePlayers['High Score'][i] > bronzePlayers['High Score'][x]:
      count+=1

  return count


bronzePlayers['chances'] = [countChances(i) for i in range(len(bronzePlayers))]
bronzePlayers.tail(7)

Unnamed: 0,Name,Matches,Runs,6s,4s,Strike Rate,High Score,chances
33,venkatesh iyer,36,956,42,86,130.25,104,92
34,cameron white,47,954,36,76,126.36,78,64
35,ben stokes,45,935,32,81,133.95,107,92
36,devon conway,23,924,30,100,141.28,92,84
37,abhishek sharma,47,893,31,92,137.38,75,79
38,herschelle gibbs,36,886,31,83,110.06,69,28
39,stuart binny,95,880,35,66,128.84,48,59


In [21]:
# Wonder who's got the best captaincy rate in bronze category

print("\n With Name only \n", bronzePlayers.nlargest(5, 'chances')['Name'])

print("\n Whole Dataframe \n", bronzePlayers.nlargest(5, 'chances'))


 With Name only 
 1        krunal pandya
4       marcus stoinis
24        sunil narine
16    yashasvi jaiswal
13      jonny bairstow
Name: Name, dtype: object

 Whole Dataframe 
                 Name  Matches  Runs  6s   4s  Strike Rate  High Score  chances
1      krunal pandya      113  1514  56  136       133.39          86      183
4     marcus stoinis       82  1478  75  111       140.63          89      179
24      sunil narine      162  1046  64  114       159.69          75      168
16  yashasvi jaiswal       37  1172  48  145       148.73         124      164
13    jonny bairstow       39  1291  55  133       142.65         114      162


In [22]:
# Similarly doing for Silver Dataframe
# IN fact we can optimize tht counting chances function lets see how

def countChancesOptimized(i, df):
  count = 0
  for x in range(len(df)):
    if x == i:
      continue

    if df['Matches'][i] > df['Matches'][x]:
      count+=1
    if df['Runs'][i] > df['Runs'][x]:
      count+=1
    if df['6s'][i] > df['6s'][x]:
      count+=1
    if df['4s'][i] > df['4s'][x]:
      count+=1
    if df['Strike Rate'][i] > df['Strike Rate'][x]:
      count+=1
    if df['High Score'][i] > df['High Score'][x]:
      count+=1

  return count

In [23]:
# Now we can do for silver and gold data simultaneously

silverPlayers['chances'] = [countChancesOptimized(i, silverPlayers) for i in range(len(silverPlayers))]
goldPlayers['chances'] = [countChancesOptimized(i, goldPlayers) for i in range(len(goldPlayers))]

In [24]:
# all set. did we do it? lets confirm.

print("Silver Dataframe \n", silverPlayers.head(5))

print("\nGold Dataframe \n", goldPlayers.head(5))

Silver Dataframe 
               Name  Matches  Runs   6s   4s  Strike Rate  High Score  chances
0  virender sehwag      104  2728  106  334       155.44         122      154
1    glenn maxwell      124  2719  158  227       157.62          95      146
2     david miller      121  2714  126  187       138.40         101      129
3  ravindra jadeja      226  2692   99  193       128.62          62      100
4     murali vijay      106  2619   91  247       121.87         127      121

Gold Dataframe 
              Name  Matches  Runs   6s   4s  Strike Rate  High Score  chances
0     virat kohli      237  7263  235  644       130.04         113      132
1  shikhar dhawan      217  6617  149  750       127.18         106      114
2    david warner      176  6397  226  646       139.92         126      142
3    rohit sharma      243  6211  258  554       130.05         109      133
4    suresh raina      205  5528  203  506       136.70         100      123


In [25]:
# Aye Aye
# Now then see who are top captaincy choice in silver and gold category

print("\n Silver: \n", silverPlayers.nlargest(5, 'chances')[['Name', 'chances']])

print("\n Gold: \n", goldPlayers.nlargest(5, 'chances')[['Name', 'chances']].reset_index(drop=True))


 Silver: 
               Name  chances
0  virender sehwag      154
1    glenn maxwell      146
5   mayank agarwal      133
2     david miller      129
4     murali vijay      121

 Gold: 
              Name  chances
0  ab de villiers      146
1    david warner      142
2     chris gayle      138
3    rohit sharma      133
4     virat kohli      132


In [24]:
# now we can try to create a function that will display the best chances of your captain

In [26]:
def compare_players():
    gold_players = goldPlayers[['Name', 'chances']]
    silver_players = silverPlayers[['Name', 'chances']]
    bronze_players = bronzePlayers[['Name', 'chances']]

    player_input = input("Enter space-separated player names: ")
    player_names = player_input.split()

    for player_name in player_names:
        player_found = False
        for category, df in [('Gold', gold_players), ('Silver', silver_players), ('Bronze', bronze_players)]:
            player_matches = df[df['Name'].str.contains(player_name, case=False)]
            if not player_matches.empty:
                player_found = True
                player_chances = player_matches['chances'].iloc[0]
                count = sum(df['chances'] < player_chances)
                total_players = len(df)
                print(f"{player_name.title()} has {player_chances} value and beats {count} other players out of {total_players} players in {category} category.")
                break
        if not player_found:
            print(f"Player '{player_name.title()}' not found in any category.")

compare_players()


Enter space-separated player names: ms rohit kohli ponting ab russell narine 
Ms has 110 value and beats 22 other players out of 30 players in Gold category.
Rohit has 133 value and beats 26 other players out of 30 players in Gold category.
Kohli has 132 value and beats 25 other players out of 30 players in Gold category.
Player 'Ponting' not found in any category.
Ab has 146 value and beats 29 other players out of 30 players in Gold category.
Russell has 106 value and beats 22 other players out of 30 players in Silver category.
Narine has 168 value and beats 37 other players out of 40 players in Bronze category.


In [27]:
# ^^^ That gives now your captaincy options solution ^^^

***PART 5: PERFORMANCE MEASURE OF A PLAYER AGAINST OTHERS.***

In [28]:
# for showing the data now in tabular way
!pip install tabulate



In [29]:
from tabulate import tabulate

In [30]:
# Lets now create player performance that will help you to select appropriate option

In [31]:
def performanceMeasure():
  player_input = input("Enter space-separated player names: ")
  playerNames = player_input.split()


  rows = []
  headers = ['Player Name', 'Matches', 'Runs', '6s', '4s', 'Strike Rate', 'High Score', 'Category']

  print('''The below table shows how a player beats other players in specific ways in their respective categories.\n
        Gold Category has a total 30 players, Silver Category has a total 30 players and Bronze Category has a total 40 players.''' )

  for playerName in playerNames:
      playerFound = False
      row = []

      for category, df in [('Gold', goldPlayers), ('Silver', silverPlayers), ('Bronze', bronzePlayers)]:
        playerMatched = df[df['Name'].str.contains(playerName, case=False)]
        if not playerMatched.empty:
            playerFound = True
            player_ = playerMatched.iloc[0]
            matchesCount = sum(df['Matches'] < player_['Matches'])
            runsCount = sum(df['Runs'] < player_['Runs'])
            sixCount = sum(df['6s'] < player_['6s'])
            fourCount = sum(df['4s'] < player_['4s'])
            sRCount = sum(df['Strike Rate'] < player_['Strike Rate'])
            hSCount = sum(df['High Score'] < player_['High Score'])

            row = [player_['Name'].title(), matchesCount, runsCount, sixCount, fourCount, sRCount, hSCount, category]
            break

      if not playerFound:
          row = [playerName, "Not found in any category"]

      rows.append(row)

  print(tabulate(rows, headers=headers, tablefmt='fancy_grid'))



performanceMeasure()

Enter space-separated player names: ab raina cena rohit gayle kohli jais gill
The below table shows how a player beats other players in specific ways in their respective categories.

        Gold Category has a total 30 players, Silver Category has a total 30 players and Bronze Category has a total 40 players.
╒══════════════════╤═══════════════════════════╤════════╤══════╤══════╤═══════════════╤══════════════╤════════════╕
│ Player Name      │ Matches                   │   Runs │   6s │   4s │   Strike Rate │   High Score │ Category   │
╞══════════════════╪═══════════════════════════╪════════╪══════╪══════╪═══════════════╪══════════════╪════════════╡
│ Ab De Villiers   │ 20                        │     24 │   27 │   20 │            29 │           26 │ Gold       │
├──────────────────┼───────────────────────────┼────────┼──────┼──────┼───────────────┼──────────────┼────────────┤
│ Suresh Raina     │ 23                        │     25 │   22 │   25 │            19 │            9 │ Gold 

In [32]:
'''thats all the analytical help can be done to make you select the best choice for captaincy, vice captaincy and for selecting
the appropriate option for a player when playing live'''

'thats all the analytical help can be done to make you select the best choice for captaincy, vice captaincy and for selecting \nthe appropriate option for a player when playing live'

**PART 6: VISUAL REPRESENTATION.**

You can skip this section.

In [33]:
# do you want to see who are the top 10 runs scorers throughout the data?
# remember all the data is available in the dataframe named data
topTenRunsScores = data.nlargest(10, 'Runs')[['Name', 'Runs']]
topTenRunsScores.index += 1
topTenRunsScores

Unnamed: 0,Name,Runs
1,virat kohli,7263
2,shikhar dhawan,6617
3,david warner,6397
4,rohit sharma,6211
5,suresh raina,5528
6,ab de villiers,5162
7,ms dhoni,5082
8,chris gayle,4965
9,robin uthappa,4952
10,dinesh karthik,4516


In [34]:
import plotly.graph_objects as go

scorersName = topTenRunsScores['Name'].str.title()
scorersRun = topTenRunsScores['Runs']

# Create a bar graph
fig = go.Figure(data=go.Bar(
    x=scorersRun,
    y=scorersName,
    orientation='h',
    hovertemplate='Runs: %{x}',
    marker_color='skyblue'
))

# Customize the layout
fig.update_layout(
    title='Top Ten Run Scores',
    xaxis_title='Runs',
    yaxis_title='Players'
)

# Show the figure
fig.show()

In [20]:
# A Player vs remaining others average record.

import plotly.graph_objects as go

# Get player names from user input
player_names = input("Enter the names of the players (space-separated): ").split()

# Initialize empty lists to store selected players' values
selected_players_values = []

# Iterate over the player names
for player_name in player_names:
    # Check if the player exists in the data dataframe
    selected_player = data[data['Name'].str.contains(player_name, case=False)]
    if not selected_player.empty:
        selected_players_values.append(selected_player.iloc[0][1:])

if not selected_players_values:
    print("No players found in the dataframe.")
    exit()

# Calculate average of remaining players' data
remaining_players = data[~data['Name'].isin(player_names)]
average_values = remaining_players.mean()

# Extract column names
columns = selected_players_values[0].index

# Create the trace for selected players
selected_players_trace = go.Bar(
    x=columns,
    y=selected_players_values[0],
    name=player_names[0].title(),
    hovertemplate="Value: %{y}"
)

# Create the trace for average values
average_trace = go.Bar(
    x=columns,
    y=average_values,
    name='Average',
    hovertemplate="Value: %{y}"
)

# Create the data list with both traces
datas = [selected_players_trace, average_trace]

# Set the layout options
layout = go.Layout(
    title='Comparison: Selected Players vs Average of Others',
    xaxis=dict(title='Category'),
    yaxis=dict(title='Value')
)

# Create the figure
fig = go.Figure(data=datas, layout=layout)

# Display the chart
fig.show()


Enter the names of the players (space-separated): faf




