In [20]:
# Data Loading & Familiarization: Load the Dataset

# importing pandas  
import pandas as pd 
	
# Load the dataset from the URL
url = "https://media.geeksforgeeks.org/wp-content/uploads/nba.csv"
df = pd.read_csv(url)

df.head()


Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [21]:
# Data Loading & Familiarization: Inspect the Structure

# Print column names
print("Column Names:")
print(df.columns.tolist())

# Check data types
print("\n Data Types:")
print(df.dtypes)

# Check for null values
print("\n Null Values:")
print(df.isnull().sum())


🧾 Column Names:
['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight', 'College', 'Salary']

🔍 Data Types:
Name         object
Team         object
Number      float64
Position     object
Age         float64
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

⚠️ Null Values:
Name         1
Team         1
Number       1
Position     1
Age          1
Height       1
Weight       1
College     85
Salary      12
dtype: int64


In [22]:
# Data Loading & Familiarization: Pure Function to Describe Dataset Info

def get_dataset_info(dataframe):
    return {
        "num_rows": dataframe.shape[0],
        "num_columns": dataframe.shape[1],
        "column_names": dataframe.columns.tolist()
    }

# Usage
info = get_dataset_info(df)
print("\n Dataset Info:")
print(info)



 Dataset Info:
{'num_rows': 458, 'num_columns': 9, 'column_names': ['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight', 'College', 'Salary']}


In [37]:
# Functional Cleaning & Preprocessing:  Define a function that returns a cleaned copy
def clean_nba_data(data):
    # Drop rows where Name or Team is blank/NaN
    cleaned = data.dropna(subset=['Name', 'Team']).copy()
    
    # Functional Cleaning & Preprocessing : Strip whitespace and lowercase those columns
    for col in ['Name', 'Team']:
        cleaned[col] = cleaned[col].str.strip().str.lower()
    
    return cleaned

# Functional Cleaning & Preprocessing:  Get your cleaned DataFrame
clean_df = clean_nba_data(df)

print("Original rows:", len(df))
print("Cleaned rows: ", len(clean_df))
print(clean_df.head())

Original rows: 458
Cleaned rows:  457
            Name            Team  Number Position   Age Height  Weight  \
0  avery bradley  boston celtics     0.0       PG  25.0    6-2   180.0   
1    jae crowder  boston celtics    99.0       SF  25.0    6-6   235.0   
2   john holland  boston celtics    30.0       SG  27.0    6-5   205.0   
3    r.j. hunter  boston celtics    28.0       SG  22.0    6-5   185.0   
4  jonas jerebko  boston celtics     8.0       PF  29.0   6-10   231.0   

             College     Salary  
0              Texas  7730337.0  
1          Marquette  6796117.0  
2  Boston University        NaN  
3      Georgia State  1148640.0  
4                NaN  5000000.0  


In [38]:
# Code Refactoring with FP Principles
ages = [15, 22, 37, 40, 51, 60]

# Procedural approach
result = []
for age in ages:
    if age % 2 == 0:
        doubled = age * 2
        result.append(doubled)

print(result)  

[44, 80, 120]


In [39]:
# Functional approach
even_ages = filter(lambda x: x % 2 == 0, ages)
doubled  = map(lambda x: x * 2, even_ages)
result   = list(doubled)

print(result)  

[44, 80, 120]


In [None]:
# Functional approach Version Is Better than Procedural approach beacuse 

# Modularity: filter and map each do one job—you can swap or reuse them without touching the rest.
# Readability: You read a clear pipeline (filter → map → collect) instead of tracking loops and accumulators.
# Immutability: Data isn’t changed in place; you always produce a fresh result, so the original stays safe.
# Composability: You can insert, remove, or reorder steps (e.g. another filter or sorted) without rewriting existing logic.



In [40]:
# Clean the data
df_clean = df.dropna()

# Functional style summary
player_count = df_clean['Name'].nunique()
average_salary = df_clean['Salary'].mean()
team_salary_avg = df_clean.groupby('Team')['Salary'].mean().sort_values(ascending=False)

# Display mini analysis
print("Total unique players:", player_count)
print("Average salary:", round(average_salary, 2))
print("Top 5 teams by average salary:\n", team_salary_avg.head())

Total unique players: 364
Average salary: 4620311.07
Top 5 teams by average salary:
 Team
Cleveland Cavaliers      6.733472e+06
Golden State Warriors    6.711293e+06
Los Angeles Clippers     6.707637e+06
Miami Heat               6.583812e+06
Chicago Bulls            6.105483e+06
Name: Salary, dtype: float64
