# NumPy Queries for Indian Cricket Team Dataset

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("C:/Users/admin/Downloads/Indian_Cricket_Team_Dataset.csv")

In [4]:
df.head()

Unnamed: 0,Player Name,Age,Matches Played,Batting Average,Strike Rate
0,Player_1,26,456,27.7,188.21
1,Player_2,23,154,24.61,139.34
2,Player_3,32,210,36.87,182.79
3,Player_4,34,426,51.81,178.77
4,Player_5,30,221,49.8,162.44


In [5]:
df.head(20)

Unnamed: 0,Player Name,Age,Matches Played,Batting Average,Strike Rate
0,Player_1,26,456,27.7,188.21
1,Player_2,23,154,24.61,139.34
2,Player_3,32,210,36.87,182.79
3,Player_4,34,426,51.81,178.77
4,Player_5,30,221,49.8,162.44
5,Player_6,27,470,22.2,149.45
6,Player_7,32,229,38.21,197.96
7,Player_8,24,249,40.9,114.5
8,Player_9,26,422,45.78,172.52
9,Player_10,29,406,46.01,119.12


# Query 1: Convert 'Age' column to a NumPy array

In [6]:
df.Age

0      26
1      23
2      32
3      34
4      30
       ..
995    24
996    28
997    23
998    35
999    30
Name: Age, Length: 1000, dtype: int64

In [7]:
df[['Age']]

Unnamed: 0,Age
0,26
1,23
2,32
3,34
4,30
...,...
995,24
996,28
997,23
998,35


# Query 2: Find the average age of players.

In [8]:
x = df['Age'].mean()
print(x)

27.57


# Query 3: Find the oldest and youngest player age.

In [17]:
max_age=np.array(df['Age'].max())
print(max_age)
min_age=np.array(df['Age'].min())
print(min_age)

35
20


# Query 4: Count the total number of players above 30 years old.

In [10]:
count = df[(df.Age > 30)]
print(count)

    Player Name  Age  Matches Played  Batting Average  Strike Rate
2      Player_3   32             210            36.87       182.79
3      Player_4   34             426            51.81       178.77
6      Player_7   32             229            38.21       197.96
24    Player_25   31             251            55.07       154.25
25    Player_26   33             260            36.70       179.17
..          ...  ...             ...              ...          ...
986  Player_987   31              25            30.73       124.75
991  Player_992   31             478            42.48       119.14
993  Player_994   31             259            32.61       193.50
994  Player_995   31             400            50.87       143.05
998  Player_999   35              16            41.93       135.59

[336 rows x 5 columns]


In [18]:
count=np.array((df["Age"]>30).sum())
print(count)

336


# Query 5: Find the median batting average.

In [12]:
df[['Batting Average']]

Unnamed: 0,Batting Average
0,27.70
1,24.61
2,36.87
3,51.81
4,49.80
...,...
995,23.46
996,29.47
997,49.30
998,41.93


In [13]:
median = df['Batting Average'].median()
print(median)

40.135


# Query 6: Find the standard deviation of strike rates

In [14]:
df.head()

Unnamed: 0,Player Name,Age,Matches Played,Batting Average,Strike Rate
0,Player_1,26,456,27.7,188.21
1,Player_2,23,154,24.61,139.34
2,Player_3,32,210,36.87,182.79
3,Player_4,34,426,51.81,178.77
4,Player_5,30,221,49.8,162.44


In [19]:
str=np.array(df['Strike Rate'].std())
print(str)

29.122759125573694


# Query 7: Find the 90th percentile of batting average.

In [24]:
b_avg=np.array(df["Batting Average"])
p=np.percentile(b_avg,90)
print(p)

55.323


# Query 8: Find the number of players with strike rate above 150.

In [25]:
s=np.array((df["Strike Rate"]>150).sum())
print(s)

513


# Query 9: Reshape the first 20 batting averages into a 4x5 matrix.

In [27]:
df.head(20)

Unnamed: 0,Player Name,Age,Matches Played,Batting Average,Strike Rate
0,Player_1,26,456,27.7,188.21
1,Player_2,23,154,24.61,139.34
2,Player_3,32,210,36.87,182.79
3,Player_4,34,426,51.81,178.77
4,Player_5,30,221,49.8,162.44
5,Player_6,27,470,22.2,149.45
6,Player_7,32,229,38.21,197.96
7,Player_8,24,249,40.9,114.5
8,Player_9,26,422,45.78,172.52
9,Player_10,29,406,46.01,119.12


In [34]:
batting = np.array(df['Batting Average'].head(20))
print(batting)

print(batting.shape)

avg = batting.reshape(5,4)
print(avg)

[27.7  24.61 36.87 51.81 49.8  22.2  38.21 40.9  45.78 46.01 34.59 42.43
 39.24 55.4  41.21 37.64 36.18 42.9  52.16 41.54]
(20,)
[[27.7  24.61 36.87 51.81]
 [49.8  22.2  38.21 40.9 ]
 [45.78 46.01 34.59 42.43]
 [39.24 55.4  41.21 37.64]
 [36.18 42.9  52.16 41.54]]


# Query 10: Compute row-wise sum of reshaped matrix.

In [37]:
row = np.sum(avg, axis = 1)
print(row)

[140.99 151.11 168.81 173.49 172.78]


# Query 11: Compute column-wise mean of reshaped matrix.

In [39]:
column = np.sum(avg, axis = 0)
print(column)

[198.7  191.12 203.04 214.32]


# Query 12: Transpose the reshaped matrix.

In [40]:
transpose = avg.T
print(transpose)

[[27.7  49.8  45.78 39.24 36.18]
 [24.61 22.2  46.01 55.4  42.9 ]
 [36.87 38.21 34.59 41.21 52.16]
 [51.81 40.9  42.43 37.64 41.54]]


# Query 13: Find the variance of batting average.

In [41]:
variance=np.array(df["Batting Average"].var())
print(variance)

131.6715008167167


# Query 14: Stack age and matches played horizontally

In [43]:
n = np.array(df["Age"])
n1 = np.array(df["Matches Played"])
stacked = np.stack((n,n1))
print(stacked)

[[ 26  23  32 ...  23  35  30]
 [456 154 210 ... 126  16 146]]


# Query 15: Split the batting average array into 5 equal parts.

In [44]:
df.head()

Unnamed: 0,Player Name,Age,Matches Played,Batting Average,Strike Rate
0,Player_1,26,456,27.7,188.21
1,Player_2,23,154,24.61,139.34
2,Player_3,32,210,36.87,182.79
3,Player_4,34,426,51.81,178.77
4,Player_5,30,221,49.8,162.44


In [45]:
n = np.array(df["Batting Average"])
split = np.split(n,5)
print(split)

[array([27.7 , 24.61, 36.87, 51.81, 49.8 , 22.2 , 38.21, 40.9 , 45.78,
       46.01, 34.59, 42.43, 39.24, 55.4 , 41.21, 37.64, 36.18, 42.9 ,
       52.16, 41.54, 46.47, 49.51, 40.81, 37.07, 55.07, 36.7 , 38.48,
       59.61, 20.01, 27.42, 35.36, 57.3 , 22.8 , 20.38, 22.11, 23.54,
       21.5 , 39.19, 43.23, 30.87, 35.93, 23.67, 33.45, 40.9 , 49.29,
       20.13, 38.69, 31.88, 54.11, 48.62, 43.56, 31.09, 56.41, 21.8 ,
       24.38, 35.67, 24.98, 58.26, 51.92, 30.36, 43.54, 59.29, 55.37,
       44.03, 56.15, 59.57, 49.73, 22.6 , 36.06, 53.51, 29.22, 53.23,
       24.81, 21.88, 35.36, 21.47, 58.29, 53.05, 52.03, 45.19, 28.65,
       40.73, 43.89, 41.  , 30.41, 40.61, 39.65, 59.85, 55.9 , 38.54,
       44.92, 49.91, 21.39, 55.8 , 54.41, 38.3 , 35.65, 30.92, 39.07,
       21.78, 54.09, 21.42, 33.16, 57.26, 49.41, 31.26, 30.11, 20.2 ,
       47.83, 21.95, 41.3 , 58.48, 24.38, 55.78, 59.66, 22.46, 55.32,
       40.64, 56.26, 42.85, 46.48, 41.75, 54.58, 49.36, 40.85, 54.73,
       29.81, 26.29