In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("IPLPlayerAuctionData.csv")

# Displaying first 5 rows
print("Displaying first 5 rows of the dataset:")
print(df.head())

# Data types and shape
print("\nData types of the columns:")
print(df.dtypes)

rows, cols = df.shape
print(f"\nShape of the dataset:\nNo. of rows: {rows}, No. of columns: {cols}\n")

In [None]:
# Creating numpy array from 'Amount' column
amount = np.array(df['Amount'])
print("Numpy array of Amount column:", amount[:5])

# Array Indexing
print("First element in Amount array:", amount[0])
print("Last element in Amount array:", amount[-1])

# Array Slicing
print("Amount elements from index 2 to 5:", amount[2:5])

# Reshape array
print("Original shape of Amount array:", amount.shape)
reshaped_amount = amount.reshape(len(amount), 1)
print("Reshaped Amount array (first 5 rows):\n", reshaped_amount[:5])

In [None]:
# Concatenation and Splitting
concatenated = np.concatenate((amount[:5], [10000000, 15000000]))
print("Concatenated array:", concatenated)

split_amount = np.array_split(amount, 2)
print("Split arrays (first part):", split_amount[0][:5])
print("Split arrays (second part):", split_amount[1][:5])

# Universal Functions
squared_amount = np.square(amount)
print("Squared Amount values (first 5):", squared_amount[:5])

# Aggregation
mean_amount = np.mean(amount)
max_amount = np.max(amount)
print("Mean Amount:", mean_amount)
print("Max Amount:", max_amount)

# Broadcasting
plus_10m = amount + 10000000
print("Amount plus 10 million (first 5):", plus_10m[:5])

# Comparisons
high_value = amount > 50000000
print("Boolean array for Amount > 50 million:\n", high_value[:10])

# Boolean Masks
high_value_players = amount[high_value]
print("Amounts greater than 50 million (first 10):", high_value_players[:10])

# Fancy Indexing
indices = [0, 5, 10]
print("Amounts at indices 0, 5, 10:", amount[indices])

# Sorting
sorted_amount = np.sort(amount)
print("Sorted Amounts (first 5):", sorted_amount[:5])

indices_by_amount = np.argsort(amount)
print("Indices sorted by Amount (first 5):", indices_by_amount[:5])
print("Sorted values (first 5):", amount[indices_by_amount[:5]])

In [None]:
# Pandas Operations
print("\nPandas Operations\n")

# Series Object
player_series = pd.Series(df['Player'])
print("Pandas Series of Players (first 5):")
print(player_series.head())

# DataFrame Info
print("\nDataFrame Info:")
df.info()

# Series Indexing
print("\nFirst player:", player_series[0])
print("Players from index 2 to 4:")
print(player_series[2:5])

In [None]:
# DataFrame Indexing
print("\nDataFrame with only 'Player' and 'Amount' columns:")
print(df[['Player', 'Amount']].head())

print("\nRow at index 1 of the DataFrame:")
print(df.loc[1])

print("\nValue in 'Team' column at index 3:")
print(df.iloc[3, df.columns.get_loc('Team')])

# Mapping / Transformation
def add_prefix(x):
    return "Origin: " + x

origin_with_prefix = df['Player Origin'].map(add_prefix)
print("\nPlayer Origin with prefix (first 5):")
print(origin_with_prefix.head())

# Series Addition
amount_series = pd.Series(df['Amount'].values, index=df['Player'])
year_series = pd.Series(df['Year'].values, index=df['Player'])
combined_series = amount_series + year_series
print("\nAmount + Year (first 5):")
print(combined_series.head())

In [None]:
# Null Values
print("\nChecking for null values:")
print(df.isnull().sum())

# Hierarchical Indexing
hierarchical_df = df.set_index(['Player', 'Team'])
print("\nDataFrame with Hierarchical Index:")
print(hierarchical_df.head())

# Creating a second sample DataFrame
df2 = pd.DataFrame({
    'Player': ['MS Dhoni', 'Virat Kohli', 'Rohit Sharma'],
    'Nationality': ['India', 'India', 'India']
})
print("\nSecond DataFrame:")
print(df2)

In [None]:
# Concatenate
concat_df = pd.concat([df.head(3), df2], ignore_index=True)
print("\nConcatenated DataFrame:")
print(concat_df)

# Append (using concat, since append is deprecated)
append_df = pd.concat([df.head(3), df2], ignore_index=True)
print("\nAppended DataFrame:")
print(append_df)


# Merge (Left Join)
merged_df = pd.merge(df, df2, on='Player', how='left')
print("\nMerged DataFrame (Left Join):")
print(merged_df.head())

In [None]:
# Grouping & Aggregation
grouped_avg = df.groupby('Team')['Amount'].mean()
print("\nAverage Amount by Team:")
print(grouped_avg.head())

# Pivot Table
pivot = pd.pivot_table(df, values='Amount', index='Team', columns='Player Origin', aggfunc='mean')
print("\nPivot Table (Average Amount by Team & Origin):")
print(pivot)



In [None]:
# Plotting
plt.figure(figsize=(8, 6))
plt.hist(df['Amount'], bins=20, edgecolor='black')
plt.title('Distribution of Auction Amounts')
plt.xlabel('Amount')
plt.ylabel('Number of Players')
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 6))
plt.scatter(df['Year'], df['Amount'])
plt.title('Year vs. Auction Amount')
plt.xlabel('Year')
plt.ylabel('Amount')
plt.grid(True)
plt.tight_layout()
plt.show()

team_counts = df['Team'].value_counts()
plt.figure(figsize=(10, 6))
team_counts.plot(kind='bar', edgecolor='black')
plt.title('Number of Players per Team')
plt.xlabel('Team')
plt.ylabel('Number of Players')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


In [None]:
avg_yearly = df.groupby('Year')['Amount'].mean()

plt.figure(figsize=(10, 6))
avg_yearly.plot(marker='o')
plt.title('Average Auction Amount Over the Years')
plt.xlabel('Year')
plt.ylabel('Average Amount')
plt.grid(True)
plt.tight_layout()
plt.show()



In [None]:
plt.figure(figsize=(10, 6))
df.boxplot(column='Amount', by='Player Origin')
plt.title('Auction Amount by Player Origin')
plt.suptitle('')
plt.xlabel('Player Origin')
plt.ylabel('Amount')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
country_counts = df['Player Origin'].value_counts().head(6)
plt.figure(figsize=(7, 7))
plt.pie(country_counts, labels=country_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Top 6 Countries by Number of Players')
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv("IPLPlayerAuctionData.csv")

# Display column names
print("Columns in the dataset:")
print(df.columns.tolist())

import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
sns.scatterplot(x='Team', y='Amount', data=df, hue='Role', alpha=0.7)
plt.title('Team-wise Auction Amount per Player')
plt.xlabel('Team')
plt.ylabel('Auction Amount')
plt.xticks(rotation=45)
plt.legend(title='Role', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
sns.boxplot(x='Team', y='Amount', data=df, palette='pastel')
plt.title('Team-wise Distribution of Auction Amounts')
plt.xlabel('Team')
plt.ylabel('Auction Amount')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
sns.violinplot(x='Year', y='Amount', data=df, palette='cool', inner='point')
plt.title('Auction Amount Distribution Over Years')
plt.xlabel('Year')
plt.ylabel('Auction Amount')
plt.tight_layout()
plt.show()