# Task 1: Introduction to Pandas


## Task 1.1: Installation and Import


1. Install the pandas library using pip if not already installed.

In [1]:
pip install pandas



2. Import pandas and check its version.

In [2]:
import pandas as pd
pd.__version__

'2.2.2'

## Task 1.2: Series Creation


1. Create a Pandas Series from a Python list containing integers.


In [4]:
pd.Series([1, 2, 3, 4, 5])

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


2. Create a Pandas Series with custom indices from a dictionary.

In [5]:
pd.Series({'a': 1, 'b': 2, 'c': 3})

Unnamed: 0,0
a,1
b,2
c,3


3. Create a Series of 10 random numbers using the `numpy.random` module.


In [7]:
import numpy as np
pd.Series(np.random.rand(11))

Unnamed: 0,0
0,0.654914
1,0.16267
2,0.421126
3,0.055934
4,0.684377
5,0.648996
6,0.346262
7,0.781177
8,0.852241
9,0.552218


# Task 2: Working with DataFrames

## Task 2.1: DataFrame Creation


1. Create a DataFrame from a dictionary containing lists of equal length.


In [9]:
pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


2. Generate a DataFrame from a list of dictionaries with mixed datatypes.


In [10]:
pd.DataFrame([{'a': 1, 'b': 2}, {'a': 3, 'b': 4}])

Unnamed: 0,a,b
0,1,2
1,3,4


3. Load a CSV file (create a small sample if unavailable) into a DataFrame and print the first 5 rows.


In [15]:
import pandas as pd
sample = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
sample.to_csv('sample.csv', index = False)
pd.read_csv('sample.csv').head()

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


## Task 2.2: Exploring Data


1. Print the shape, columns, and index of the DataFrame.


In [16]:
df = pd.read_csv('sample.csv')
print(df.shape)
print(df.columns)
print(df.index)

(3, 2)
Index(['a', 'b'], dtype='object')
RangeIndex(start=0, stop=3, step=1)


2. Use `.info()` and `.describe()` to explore the data.


In [17]:
df = pd.read_csv('sample.csv')
print(df.info())
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   a       3 non-null      int64
 1   b       3 non-null      int64
dtypes: int64(2)
memory usage: 176.0 bytes
None
         a    b
count  3.0  3.0
mean   2.0  5.0
std    1.0  1.0
min    1.0  4.0
25%    1.5  4.5
50%    2.0  5.0
75%    2.5  5.5
max    3.0  6.0


3. Display specific rows and columns using `iloc` and `loc`.


In [24]:
df = pd.read_csv('sample.csv')
print(df.iloc[0])
print(df.loc[0])

a    1
b    4
Name: 0, dtype: int64
a    1
b    4
Name: 0, dtype: int64


# Task 3: DataFrame Operations


## Task 3.1: Basic Operations


1. Add a new column to the DataFrame based on a mathematical operation on existing columns.


In [25]:
df = pd.read_csv('sample.csv')
df['c'] = df['a'] + df['b']
df

Unnamed: 0,a,b,c
0,1,4,5
1,2,5,7
2,3,6,9


2. Rename columns in the DataFrame.


In [26]:
df = pd.read_csv('sample.csv')
df.rename(columns={'a': 'A', 'b': 'B'})

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


3. Drop a specific row and column from the DataFrame.


In [27]:
df = pd.read_csv('sample.csv')
df.drop(0)

Unnamed: 0,a,b
1,2,5
2,3,6


4. Create a DataFrame using a Python dictionary.

In [28]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


# Task 4: Save and Export


1. Save the DataFrame to a CSV file.

In [29]:
pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}).to_csv('sample.csv', index = False)

2. Save the DataFrame to an Excel file with a specific sheet name.


In [31]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
df.to_excel('sample.xlsx', sheet_name='Sheet1')