#  Task 12: Introduction to Pandas (Series, DataFrame basics)

In [1]:
import pandas as pd
import numpy as np

## Pandas Series

### 1. Creating a Pandas Series

In [2]:
# Create a Series from a Numpy array
data = [10,20,30,40,50]
serial_data = pd.Series(data)
print("Series data:\n",serial_data)

Series data:
 0    10
1    20
2    30
3    40
4    50
dtype: int64


In [3]:
# Create a Series from a Numpy array
data_arr = np.array([10, 20, 30, 40, 50])
series_arr = pd.Series(data_arr)
print("\nSeries from numpy array:")
print(series_arr)


Series from numpy array:
0    10
1    20
2    30
3    40
4    50
dtype: int32


In [4]:
# Create a Series from a dictionary
data_dict = {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50}
series_dict = pd.Series(data_dict)
print("\nSeries from dictionary:")
print(series_dict)


Series from dictionary:
a    10
b    20
c    30
d    40
e    50
dtype: int64


### 2. Assign a custom index to the Series

In [5]:
series_index = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
print("\nSeries with custom index:")
print(series_index)


Series with custom index:
a    10
b    20
c    30
d    40
e    50
dtype: int64


### 3. Perform basic arithmetic operations on Series

In [6]:
print("\nSeries addition:")
print(series_index + 5)
print("\nSeries subtraction:")
print(series_index * 2)
print("\nSeries multiplication:")
print(series_index - 5)
print("\nSeries division:")
print(series_index / 2)


Series addition:
a    15
b    25
c    35
d    45
e    55
dtype: int64

Series subtraction:
a     20
b     40
c     60
d     80
e    100
dtype: int64

Series multiplication:
a     5
b    15
c    25
d    35
e    45
dtype: int64

Series division:
a     5.0
b    10.0
c    15.0
d    20.0
e    25.0
dtype: float64


### 4. Access Elements Using Index Labels and Positions

In [7]:
print("\nAccess element using index label 'c':")
print(series_index['c'])
print("\nAccess element using position 2:")
print(series_index.iloc[2])


Access element using index label 'c':
30

Access element using position 2:
30


### 5. Filter the series to include only values Greater than o Less than

In [8]:
print("\nFilter values greater than 30:")
print(series_index[series_index > 30])
print("\nFilter values less than 30:")
print(series_index[series_index < 30])


Filter values greater than 30:
d    40
e    50
dtype: int64

Filter values less than 30:
a    10
b    20
dtype: int64


## Pandas DataFrame

### 1. Create a DataFrame from a Dictionary of Lists

In [9]:
data_dict = {
    'Name': ['Ali', 'Haider', 'Asad', 'Ahtasham'],
    'Age': [19, 21, 20, 28],
    'City': ['Rwp', 'Isl', 'Lahore', 'Karachi']
}
df = pd.DataFrame(data_dict)
print("\nDataFrame from dictionary of lists:")
print(df)


DataFrame from dictionary of lists:
       Name  Age     City
0       Ali   19      Rwp
1    Haider   21      Isl
2      Asad   20   Lahore
3  Ahtasham   28  Karachi


### 2. Create a DataFrame from a NumPy Array

In [10]:
data_array = np.array([
    [20, 'RWP'],
    [21, 'ISL'],
    [21, 'LHR'],
    [22, 'KAR']
])
df = pd.DataFrame(data_array, columns=['Age', 'City'], index=['Ali', 'Haider', 'Asad', 'Ahtasham'])
print("\nDataFrame from numpy array:")
print(df)


DataFrame from numpy array:
         Age City
Ali       20  RWP
Haider    21  ISL
Asad      21  LHR
Ahtasham  22  KAR


### 3. Load a DataFrame from a CSV File

In [11]:
df = pd.read_csv("data.csv")
print(df)

   Time        V1        V2        V3        V4
0     0 -1.359807 -0.072781  2.536347  1.378155
1     0  1.191857  0.266151  0.166480  0.448154
2     1 -1.358354 -1.340163  1.773209  0.379780
3     1 -0.966272 -0.185226  1.792993 -0.863291
4     2 -1.158233  0.877737  1.548718  0.403034


### 4. Display the First and Last Five Rows of the DataFrame

In [12]:
print("First 3 rows of data:\n",df.head(3))
print("\nlast 2 rows of data:\n",df.tail(2))

First 3 rows of data:
    Time        V1        V2        V3        V4
0     0 -1.359807 -0.072781  2.536347  1.378155
1     0  1.191857  0.266151  0.166480  0.448154
2     1 -1.358354 -1.340163  1.773209  0.379780

last 2 rows of data:
    Time        V1        V2        V3        V4
3     1 -0.966272 -0.185226  1.792993 -0.863291
4     2 -1.158233  0.877737  1.548718  0.403034


### 5. Get a Summary of the DataFrame

In [13]:
print("Summary of dataframe:\n",df.describe())
print("\nMean of dataframe:\n",df.mean())
print("\nMedian of dataframe:\n",df.median())
print("\nMode of dataframe:\n",df.mode())

Summary of dataframe:
           Time        V1        V2        V3        V4
count  5.00000  5.000000  5.000000  5.000000  5.000000
mean   0.80000 -0.730162 -0.090857  1.563549  0.349166
std    0.83666  1.086765  0.811731  0.865297  0.797262
min    0.00000 -1.359807 -1.340163  0.166480 -0.863291
25%    0.00000 -1.358354 -0.185226  1.548718  0.379780
50%    1.00000 -1.158233 -0.072781  1.773209  0.403034
75%    1.00000 -0.966272  0.266151  1.792993  0.448154
max    2.00000  1.191857  0.877737  2.536347  1.378155

Mean of dataframe:
 Time    0.800000
V1     -0.730162
V2     -0.090857
V3      1.563549
V4      0.349166
dtype: float64

Median of dataframe:
 Time    1.000000
V1     -1.158233
V2     -0.072781
V3      1.773209
V4      0.403034
dtype: float64

Mode of dataframe:
    Time        V1        V2        V3        V4
0   0.0 -1.359807 -1.340163  0.166480 -0.863291
1   1.0 -1.358354 -0.185226  1.548718  0.379780
2   NaN -1.158233 -0.072781  1.773209  0.403034
3   NaN -0.966272  0.2661

### 6. Extract a Specific Column as a Series

In [14]:
print("Extract a Column 'Time' from dataframe:\n",df['Time'])

Extract a Column 'Time' from dataframe:
 0    0
1    0
2    1
3    1
4    2
Name: Time, dtype: int64


### 7. Filter Rows Based on Column Values

In [15]:
print("Value Greater than zero:\n",df[df['Time'] > 0])

Value Greater than zero:
    Time        V1        V2        V3        V4
2     1 -1.358354 -1.340163  1.773209  0.379780
3     1 -0.966272 -0.185226  1.792993 -0.863291
4     2 -1.158233  0.877737  1.548718  0.403034


### 8. Select Rows Based on Multiple Conditions

In [16]:
print("Value less than zero of V1 and greater than zero of V2:\n",df[(df['V1'] < 0) & (df['V2'] > 0)])

Value less than zero of V1 and greater than zero of V2:
    Time        V1        V2        V3        V4
4     2 -1.158233  0.877737  1.548718  0.403034


### 9. Add a New Column to the DataFrame

In [17]:
df['V5'] = [1,2,3,4,5]
print("\nDataFrame after addition column 'V5':")
print(df)


DataFrame after addition column 'V5':
   Time        V1        V2        V3        V4  V5
0     0 -1.359807 -0.072781  2.536347  1.378155   1
1     0  1.191857  0.266151  0.166480  0.448154   2
2     1 -1.358354 -1.340163  1.773209  0.379780   3
3     1 -0.966272 -0.185226  1.792993 -0.863291   4
4     2 -1.158233  0.877737  1.548718  0.403034   5


### 10. Delete a column from the DataFrame


In [18]:
df = df.drop(columns=['V5'])
print("\nDataFrame after deleting column 'V5':")
print(df)


DataFrame after deleting column 'V5':
   Time        V1        V2        V3        V4
0     0 -1.359807 -0.072781  2.536347  1.378155
1     0  1.191857  0.266151  0.166480  0.448154
2     1 -1.358354 -1.340163  1.773209  0.379780
3     1 -0.966272 -0.185226  1.792993 -0.863291
4     2 -1.158233  0.877737  1.548718  0.403034


### 11. Rename Columns in the DataFrame

In [19]:
df = df.rename(columns={'Time' : 'Value'})
print("\nDataFrame after renaming column 'Time':")
print(df)


DataFrame after renaming column 'Time':
   Value        V1        V2        V3        V4
0      0 -1.359807 -0.072781  2.536347  1.378155
1      0  1.191857  0.266151  0.166480  0.448154
2      1 -1.358354 -1.340163  1.773209  0.379780
3      1 -0.966272 -0.185226  1.792993 -0.863291
4      2 -1.158233  0.877737  1.548718  0.403034


# Mini Project: Employee Data Analysis

### 1. Create a DataFrame with employee data

In [20]:
employee_data = {
    'Name': ['Ali', 'Ahmed', 'Asad', 'Aslam', 'Ahtasham'],
    'Age': [25, 30, 35, 40, 45],
    'Department': ['HR', 'Manufacturing', 'Marketing', 'Business', 'Designing'],
    'Salary': [20000, 10000, 40000, 60000, 90000],
    'Years of Experience': [2, 5, 8, 10, 6]
}
df = pd.DataFrame(employee_data)
print("Employee DataFrame:")
print(df)

Employee DataFrame:
       Name  Age     Department  Salary  Years of Experience
0       Ali   25             HR   20000                    2
1     Ahmed   30  Manufacturing   10000                    5
2      Asad   35      Marketing   40000                    8
3     Aslam   40       Business   60000                   10
4  Ahtasham   45      Designing   90000                    6


### 2. Display Basic Information

In [21]:
print("\nFirst few rows of the DataFrame:")
print(df.head())

print("\nLast few rows of the DataFrame:")
print(df.tail())

print("\nSummary of the DataFrame:")
print(df.describe())


First few rows of the DataFrame:
       Name  Age     Department  Salary  Years of Experience
0       Ali   25             HR   20000                    2
1     Ahmed   30  Manufacturing   10000                    5
2      Asad   35      Marketing   40000                    8
3     Aslam   40       Business   60000                   10
4  Ahtasham   45      Designing   90000                    6

Last few rows of the DataFrame:
       Name  Age     Department  Salary  Years of Experience
0       Ali   25             HR   20000                    2
1     Ahmed   30  Manufacturing   10000                    5
2      Asad   35      Marketing   40000                    8
3     Aslam   40       Business   60000                   10
4  Ahtasham   45      Designing   90000                    6

Summary of the DataFrame:
             Age        Salary  Years of Experience
count   5.000000      5.000000              5.00000
mean   35.000000  44000.000000              6.20000
std     7.905694  

### 3. Data Selection and Filtering

In [22]:
print("\nNames of the employees:")
print(df['Name'])

high_salary = df[df['Salary'] > 40000]
print("\nEmployees with salary greater than 40000:")
print(high_salary)

eng_emp = df[(df['Department'] == 'Engineering') & (df['Years of Experience'] > 5)]
print("\nEngineering employees with more than 5 years of experience:")
print(eng_emp)


Names of the employees:
0         Ali
1       Ahmed
2        Asad
3       Aslam
4    Ahtasham
Name: Name, dtype: object

Employees with salary greater than 40000:
       Name  Age Department  Salary  Years of Experience
3     Aslam   40   Business   60000                   10
4  Ahtasham   45  Designing   90000                    6

Engineering employees with more than 5 years of experience:
Empty DataFrame
Columns: [Name, Age, Department, Salary, Years of Experience]
Index: []


### 4. Data Manipulation

In [23]:
df['Bonus'] = df['Salary'] * 0.2
print("\nDataFrame after adding 'Bonus' column:")
print(df)

df = df.rename(columns={'Years of Experience': 'Experience'})
print("\nDataFrame after renaming column:")
print(df)

df = df.drop(columns=['Bonus'])
print("\nDataFrame after deleting 'Bonus' column:")
print(df)


DataFrame after adding 'Bonus' column:
       Name  Age     Department  Salary  Years of Experience    Bonus
0       Ali   25             HR   20000                    2   4000.0
1     Ahmed   30  Manufacturing   10000                    5   2000.0
2      Asad   35      Marketing   40000                    8   8000.0
3     Aslam   40       Business   60000                   10  12000.0
4  Ahtasham   45      Designing   90000                    6  18000.0

DataFrame after renaming column:
       Name  Age     Department  Salary  Experience    Bonus
0       Ali   25             HR   20000           2   4000.0
1     Ahmed   30  Manufacturing   10000           5   2000.0
2      Asad   35      Marketing   40000           8   8000.0
3     Aslam   40       Business   60000          10  12000.0
4  Ahtasham   45      Designing   90000           6  18000.0

DataFrame after deleting 'Bonus' column:
       Name  Age     Department  Salary  Experience
0       Ali   25             HR   20000       