In [1]:
# Import the Pandas Library
import pandas as pd
import numpy as np

# Create IPL DataFrame
ipl_data = {
    'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings', 'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
    'Rank': [1, 2, 2, 3, 3, 4, 1, 1, 2, 4, 1, 2],
    'Year': [2014, 2015, 2014, 2015, 2014, 2015, 2016, 2017, 2016, 2014, 2015, 2017],
    'Points': [876, 789, 863, 673, 741, 812, 756, 788, 694, 701, 804, 690]
}
df = pd.DataFrame(ipl_data)

# Display the DataFrame
print("### IPL Data ###")
print(df)

# Split Data into Groups
grouped_by_team = df.groupby('Team')
print("\n### Grouped by 'Team' ###")
print(grouped_by_team)

# View Groups
print("\n### Group Keys and Indices ###")
print(grouped_by_team.groups)

# Grouping by Multiple Columns
grouped_by_team_year = df.groupby(['Team', 'Year'])
print("\n### Groups by 'Team' and 'Year' ###")
print(grouped_by_team_year.groups)

# Iterating Through Groups
print("\n### Iterating Through Groups by 'Year' ###")
grouped_by_year = df.groupby('Year')
for year, group in grouped_by_year:
    print(f"\nYear: {year}")
    print(group)

# Select a Specific Group
print("\n### Data for Year 2014 ###")
print(grouped_by_year.get_group(2014))

# Aggregation
print("\n### Mean Points by 'Year' ###")
print(grouped_by_year['Points'].agg(np.mean))

# Applying Multiple Aggregation Functions
print("\n### Aggregation Functions (Sum, Mean, Std) by 'Team' ###")
print(grouped_by_team['Points'].agg([np.sum, np.mean, np.std]))

# Transformation
print("\n### Standardized Points by 'Team' ###")
score = lambda x: (x - x.mean()) / x.std() * 10
print(grouped_by_team.transform(score))

# Filtration
print("\n### Teams with 3 or More Records ###")
filtered_data = df.groupby('Team').filter(lambda x: len(x) >= 3)
print(filtered_data)


### IPL Data ###
      Team  Rank  Year  Points
0   Riders     1  2014     876
1   Riders     2  2015     789
2   Devils     2  2014     863
3   Devils     3  2015     673
4    Kings     3  2014     741
5    kings     4  2015     812
6    Kings     1  2016     756
7    Kings     1  2017     788
8   Riders     2  2016     694
9   Royals     4  2014     701
10  Royals     1  2015     804
11  Riders     2  2017     690

### Grouped by 'Team' ###
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000261474F5760>

### Group Keys and Indices ###
{'Devils': [2, 3], 'Kings': [4, 6, 7], 'Riders': [0, 1, 8, 11], 'Royals': [9, 10], 'kings': [5]}

### Groups by 'Team' and 'Year' ###
{('Devils', 2014): [2], ('Devils', 2015): [3], ('Kings', 2014): [4], ('Kings', 2016): [6], ('Kings', 2017): [7], ('Riders', 2014): [0], ('Riders', 2015): [1], ('Riders', 2016): [8], ('Riders', 2017): [11], ('Royals', 2014): [9], ('Royals', 2015): [10], ('kings', 2015): [5]}

### Iterating Through Groups by 'Ye

  print(grouped_by_year['Points'].agg(np.mean))
  print(grouped_by_team['Points'].agg([np.sum, np.mean, np.std]))
  print(grouped_by_team['Points'].agg([np.sum, np.mean, np.std]))
  print(grouped_by_team['Points'].agg([np.sum, np.mean, np.std]))


Explanation of Each Section:


1-Creating the DataFrame: Initializes the dataset for IPL teams, ranks, years, and points.


2-Split Data into Groups: Groups data by 'Team' using groupby.


3-View Groups: Displays group keys and corresponding indices in the DataFrame.


4-Grouping by Multiple Columns: Groups data by 'Team' and 'Year'.


5-Iterating Through Groups: Iterates through groups based on the 'Year' column.


6-Select a Specific Group: Retrieves the group for the year 2014 using get_group.


7-Aggregation: Calculates the mean points for each year.


8-Multiple Aggregation Functions: Applies sum, mean, and std functions to 'Points' grouped by 'Team'.


9-Transformation: Standardizes the 'Points' column for each group.


10-Filtration: Filters groups with 3 or more records.


11-Grouping by Multiple Columns with Multiple Functions: Groups data by 'Team' and 'Year
'''