### Introduction to Pandas (Series, DataFrame basics)


In [4]:
import pandas as pd
import numpy as np

In [5]:
# Creating a Series from a list,array,dictionary
list1 = [10, 20, 30, 40, 50]
series = pd.Series(list1)
print(series)

array = np.array([10, 20, 30, 40, 50])
series = pd.Series(array)
print(series)

data_dict = {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50}
series = pd.Series(data_dict)
print(series)

0    10
1    20
2    30
3    40
4    50
dtype: int64
0    10
1    20
2    30
3    40
4    50
dtype: int32
a    10
b    20
c    30
d    40
e    50
dtype: int64


In [6]:
custom_index = pd.Series(list1, index=['A', 'B', 'C', 'D', 'E'])
print(custom_index)

A    10
B    20
C    30
D    40
E    50
dtype: int64


In [7]:
# Arithmetic operations
series1 = pd.Series([1, 2, 3])
series2 = pd.Series([4, 5, 6])

# Addition
print(series1 + series2)

# Subtraction
print(series1 - series2)

# Multiplication
print(series1 * series2)

# Division
print(series1 / series2)


0    5
1    7
2    9
dtype: int64
0   -3
1   -3
2   -3
dtype: int64
0     4
1    10
2    18
dtype: int64
0    0.25
1    0.40
2    0.50
dtype: float64


In [8]:
#using index labels
print(custom_index['B'])

#using position
print(custom_index[1])


20
20


In [9]:
#threshold = 30 here
filtered_series = custom_index[custom_index > 30]
print(filtered_series)

D    40
E    50
dtype: int64


#### Create DataFrames

In [10]:
data = {'Name': ['Amna', 'Hareem', 'Noreen', 'Faiza'],
    'Age': [20, 21, 21, 20],
    'City': ['Islamabad', 'Sargodha', 'KhanPur', 'Kabul']}
df_from_dict = pd.DataFrame(data)
print(df_from_dict)

     Name  Age       City
0    Amna   20  Islamabad
1  Hareem   21   Sargodha
2  Noreen   21    KhanPur
3   Faiza   20      Kabul


In [11]:
# numpy array
data_array = np.array([[20, 'Islamabad'], [21, 'Sargodha'], [21, 'KhanPur'], [20, 'Kabul']])
df_from_array = pd.DataFrame(data_array, columns=['Age', 'City'], index=['Amna', 'Hareem', 'Noreen', 'Kabul'])
print(df_from_array)

       Age       City
Amna    20  Islamabad
Hareem  21   Sargodha
Noreen  21    KhanPur
Kabul   20      Kabul


In [31]:
# Sample data
data = {
    'ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Name': ['Amna', 'Bushra', 'kareem', 'Daood', 'Elina', 'Farah', 'Gull', 'Hamna', 'Irum', 'Javeria'],
    'Age': [23, 27, 22, 25, 28, 24, 29, 26, 21, 23],
    'Score': [85, 90, 70, 88, 95, 80, 92, 78, 74, 84],
    'City': ['Islamabad', 'Lahore', 'Chicha watni', 'Haiderabad', 'Peshawar', 'Peru', 'Sharda', 'Shimla', 'Dam', 'Sialkot']
}

# Creating DataFrame
df = pd.DataFrame(data)

# Saving to CSV
csv_file_path = 'C:/Users/Abid Khan/Desktop/BWF_DS/Task 12/dataset.csv'
df.to_csv(csv_file_path, index=False)

# Loading the DataFrame from the CSV file
df_loaded = pd.read_csv(csv_file_path)
print(df_loaded)

   ID     Name  Age  Score          City
0   1     Amna   23     85     Islamabad
1   2   Bushra   27     90        Lahore
2   3   kareem   22     70  Chicha watni
3   4    Daood   25     88    Haiderabad
4   5    Elina   28     95      Peshawar
5   6    Farah   24     80          Peru
6   7     Gull   29     92        Sharda
7   8    Hamna   26     78        Shimla
8   9     Irum   21     74           Dam
9  10  Javeria   23     84       Sialkot


In [35]:
# Display the first five rows
print(df_loaded.head(1))
print("\n\n")
# Display the last five rows
print(df_loaded.tail(1))

   ID  Name  Age  Score       City
0   1  Amna   23     85  Islamabad



   ID     Name  Age  Score     City
9  10  Javeria   23     84  Sialkot


In [38]:
summary = df_loaded.describe()
summary.loc['median'] = df_loaded.median(numeric_only=True)
print("\nSummary of the DataFrame:")
print(summary)



Summary of the DataFrame:
              ID       Age      Score
count   10.00000  10.00000  10.000000
mean     5.50000  24.80000  83.600000
std      3.02765   2.65832   8.058122
min      1.00000  21.00000  70.000000
25%      3.25000  23.00000  78.500000
50%      5.50000  24.50000  84.500000
75%      7.75000  26.75000  89.500000
max     10.00000  29.00000  95.000000
median   5.50000  24.50000  84.500000


In [42]:
# Extracting a column as a Series
age_series = df_loaded['Age']
print("\nAge column as a Series:")
print(age_series)


# Filtering rows 
filtered_df = df_loaded[df_loaded['Age'] > 25]
print("\nRows where Age > 25:")
print(filtered_df)

# Both
filtered_df_multiple_conditions = df_loaded[(df_loaded['Age'] > 25) & (df_loaded['Score'] > 80)]
print("\nRows where Age > 25 and Score > 80:")
print(filtered_df_multiple_conditions)


Age column as a Series:
0    23
1    27
2    22
3    25
4    28
5    24
6    29
7    26
8    21
9    23
Name: Age, dtype: int64

Rows where Age > 25:
   ID    Name  Age  Score      City
1   2  Bushra   27     90    Lahore
4   5   Elina   28     95  Peshawar
6   7    Gull   29     92    Sharda
7   8   Hamna   26     78    Shimla

Rows where Age > 25 and Score > 80:
   ID    Name  Age  Score      City
1   2  Bushra   27     90    Lahore
4   5   Elina   28     95  Peshawar
6   7    Gull   29     92    Sharda


In [44]:
df_loaded['Pass'] = df_loaded['Score'] >= 75
print("\nDataFrame after adding 'Pass' column:")
print(df_loaded)

df_loaded.drop('City', axis=1, inplace=True)
print("\nDataFrame after deleting 'City' column:")
print(df_loaded)


DataFrame after adding 'Pass' column:
   ID     Name  Age  Score          City   Pass
0   1     Amna   23     85     Islamabad   True
1   2   Bushra   27     90        Lahore   True
2   3   kareem   22     70  Chicha watni  False
3   4    Daood   25     88    Haiderabad   True
4   5    Elina   28     95      Peshawar   True
5   6    Farah   24     80          Peru   True
6   7     Gull   29     92        Sharda   True
7   8    Hamna   26     78        Shimla   True
8   9     Irum   21     74           Dam  False
9  10  Javeria   23     84       Sialkot   True

DataFrame after deleting 'City' column:
   ID     Name  Age  Score   Pass
0   1     Amna   23     85   True
1   2   Bushra   27     90   True
2   3   kareem   22     70  False
3   4    Daood   25     88   True
4   5    Elina   28     95   True
5   6    Farah   24     80   True
6   7     Gull   29     92   True
7   8    Hamna   26     78   True
8   9     Irum   21     74  False
9  10  Javeria   23     84   True


In [48]:
# Renaming columns in the DataFrame
df_loaded.rename(columns={'Name': 'First Name', 'Age': 'Years', 'Score': 'TestScore'}, inplace=True)
print("\nDataFrame after renaming columns:")
print(df_loaded)


DataFrame after renaming columns:
   ID FullName  Years  TestScore   Pass
0   1     Amna     23         85   True
1   2   Bushra     27         90   True
2   3   kareem     22         70  False
3   4    Daood     25         88   True
4   5    Elina     28         95   True
5   6    Farah     24         80   True
6   7     Gull     29         92   True
7   8    Hamna     26         78   True
8   9     Irum     21         74  False
9  10  Javeria     23         84   True
