# Descriptive Statistics & String Methods in Pandas

In [2]:
import pandas as pd

# Creating a sample DataFrame with missing values
data = {
    "Name": ["Hari", "Shyam", None],
    "Age": [12, 14, 18],
    "Subject": ["Math", None, "Anthropology"]
}
df = pd.DataFrame(data)

### Descriptive Statistics in Pandas

In [68]:
# The describe() method gives a statistical summary of numerical columns
print("\nStatistical Summary:")
print(df.describe())

# Count non-null values in each column
print("\nCount of non-null values:")
print(df.count())

# Median of 'Age' column
print("\nMedian of Age column:")
print(df['Age'].median())


Statistical Summary:
             Age
count   3.000000
mean   14.666667
std     3.055050
min    12.000000
25%    13.000000
50%    14.000000
75%    16.000000
max    18.000000

Count of non-null values:
Name       2
Age        3
Subject    2
dtype: int64

Median of Age column:
14.0


In [72]:
# print(df.min())  # Minimum values
# print(df.max())  # Maximum values
# print(df.var())  # Variance
# print(df.std())  # Standard deviation
# print(df.corr())  # Correlation between numeric columns
# print(df.quantile([0.25, 0.75]))  # Quartiles
# print(df.skew())  # Skewness of the data
# print(df.kurtosis())  # Kurtosis

### Function Application in Pandas

In [10]:
# Element-wise Function Application

# The map() method applies a function to each element in a Series

def double(x):
    return x * 2

ages = pd.Series([12, 14, 18])
print("\nDoubled ages:")
print(ages.map(double))


Doubled ages:
0    24
1    28
2    36
dtype: int64


In [12]:
# Row-wise and Column-wise application using apply()
def add_ten(x):
    return x + 10

print("\nAges after adding 10:")
print(df["Age"].apply(add_ten))


Ages after adding 10:
0    22
1    24
2    28
Name: Age, dtype: int64


### Reindexing in Pandas

In [15]:
df_reindex = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
print("\nOriginal DataFrame:")
print(df_reindex)


Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6


In [17]:
# Reindexing the rows

new_index = [2, 0, 1]
df_reindexed = df_reindex.reindex(new_index)
print("\nReindexed DataFrame:")
print(df_reindexed)


Reindexed DataFrame:
   A  B
2  3  6
0  1  4
1  2  5


### Iteration in Pandas

In [20]:
df_iter = pd.DataFrame({"Name": ["Hari", "Shyam"], "Age": [12, 14], "Subject": ["Math", "Anthropology"]})

# Iterating over column names and their data
print("\nIterating over DataFrame columns:")
for col_name, data in df_iter.items():
    print(f"{col_name}:\n{data}\n")


Iterating over DataFrame columns:
Name:
0     Hari
1    Shyam
Name: Name, dtype: object

Age:
0    12
1    14
Name: Age, dtype: int64

Subject:
0            Math
1    Anthropology
Name: Subject, dtype: object



In [22]:
# Iterating over rows using iterrows()

print("\nIterating over rows:")
for index, row in df_iter.iterrows():
    print(f"Hi {row['Name']}, you are {row['Age']} years old and studying {row['Subject']}.")


Iterating over rows:
Hi Hari, you are 12 years old and studying Math.
Hi Shyam, you are 14 years old and studying Anthropology.


### Sorting in Pandas

In [26]:
df_sort = pd.DataFrame({'Name': ['Ram', 'Sita', 'Gita'], 'Age': [25, 30, 35], 'Marks': [500, 600, 380]})

In [28]:
# Sorting by age in descending order

print("\nSorted by Age:")
print(df_sort.sort_values("Age", ascending=False))


Sorted by Age:
   Name  Age  Marks
2  Gita   35    380
1  Sita   30    600
0   Ram   25    500


In [30]:
# Sorting by multiple columns

print("\nSorted by Age and Marks:")
print(df_sort.sort_values(["Age", "Marks"], ascending=False))


Sorted by Age and Marks:
   Name  Age  Marks
2  Gita   35    380
1  Sita   30    600
0   Ram   25    500


### String Methods in Pandas

In [36]:
df_strings = pd.Series(['Ram', 'sHyam', 'hari', 'Gita', 'rita'])

In [38]:
# Convert all strings to lowercase

print("\nLowercase Strings:")
print(df_strings.str.lower())


Lowercase Strings:
0      ram
1    shyam
2     hari
3     gita
4     rita
dtype: object


In [40]:
# Convert all strings to title case

print("\nTitle Case Strings:")
print(df_strings.str.title())


Title Case Strings:
0      Ram
1    Shyam
2     Hari
3     Gita
4     Rita
dtype: object


In [42]:
# Capitalize the first letter

print("\nCapitalized Strings:")
print(df_strings.str.capitalize())


Capitalized Strings:
0      Ram
1    Shyam
2     Hari
3     Gita
4     Rita
dtype: object


In [44]:
# Swap case of each character

print("\nSwapped Case Strings:")
print(df_strings.str.swapcase())


Swapped Case Strings:
0      rAM
1    ShYAM
2     HARI
3     gITA
4     RITA
dtype: object


In [46]:
# Strip leading and trailing spaces

whitespace_strings = pd.Series(['  Ram   ', '   I am a', 'hari', 'Gita', 'rita   '])
print("\nBefore Strip:")
print(whitespace_strings)
print("\nAfter Strip:")
print(whitespace_strings.str.strip())


Before Strip:
0       Ram   
1       I am a
2         hari
3         Gita
4      rita   
dtype: object

After Strip:
0       Ram
1    I am a
2      hari
3      Gita
4      rita
dtype: object


In [48]:
# Join strings with an underscore

print("\nJoined Strings:")
print(df_strings.str.join('_'))


Joined Strings:
0        R_a_m
1    s_H_y_a_m
2      h_a_r_i
3      G_i_t_a
4      r_i_t_a
dtype: object


In [50]:
# Replace a substring within strings

learning_strings = pd.Series(['deep earning', 'machine learning', 'artificial intelligence'])
print("\nReplacing 'earning' with 'learning':")
print(learning_strings.str.replace('earning', 'learning'))


Replacing 'earning' with 'learning':
0              deep learning
1          machine llearning
2    artificial intelligence
dtype: object


In [52]:
# Check if strings contain a specific substring

print("\nContains 'learning':")
print(learning_strings.str.contains('learning'))


Contains 'learning':
0    False
1     True
2    False
dtype: bool


In [54]:
# Check if strings start with a specific substring

print("\nStarts with 'deep':")
print(learning_strings.str.startswith('deep'))


Starts with 'deep':
0     True
1    False
2    False
dtype: bool


In [56]:
# Check if strings end with a specific substring

print("\nEnds with 'earning':")
print(learning_strings.str.endswith('earning'))


Ends with 'earning':
0     True
1     True
2    False
dtype: bool


In [58]:
# Find the first occurrence of a character

print("\nFirst occurrence of 'e':")
print(learning_strings.str.find('e'))


First occurrence of 'e':
0     1
1     6
2    14
dtype: int64


In [60]:
# Find the last occurrence of a character

print("\nLast occurrence of 'e':")
print(learning_strings.str.rfind('e'))


Last occurrence of 'e':
0     5
1     9
2    22
dtype: int64


In [62]:
# Count occurrences of a substring

print("\nCount of 'e' in each string:")
print(learning_strings.str.count('e'))


Count of 'e' in each string:
0    3
1    2
2    3
dtype: int64


In [64]:
# Get length of each string

print("\nString Lengths:")
print(learning_strings.str.len())


String Lengths:
0    12
1    16
2    23
dtype: int64
