# Pandas DataFrame and Series

Pandas is a powerful data manipulation library in Python , very widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. A series is a one-dimensional array-like object, while a DataFrame is a two-dimensional, size-mutable , and potentially heterogenous tabular data structure with labeled axes(rows and columns)

In [None]:
import pandas as pd

data=[1,2,3,4,5]
series = pd.Series(data)
print("Series: \n",series)

# Series is a one-dimensional labeled array capable of holding any data type.

Series: 
 0    1
1    2
2    3
3    4
4    5
dtype: int64


In [3]:
## Create a Series from  a dictionary
data = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
series_dict = pd.Series(data)
print("\nSeries from dictionary: \n", series_dict)


Series from dictionary: 
 a    10
b    20
c    30
d    40
dtype: int64


In [4]:
data = [10,20,30,40,50]
index=['a','b','c','d','e']

pd.Series(data, index= index)

a    10
b    20
c    30
d    40
e    50
dtype: int64

In [None]:
# DataFrame is a two-dimensional labeled data structure with columns of potentially different types.

# Create a DataFrame from a Dictionary

data = {
    'Name' : ['Nayem', 'Nobita', 'Jack'],
    'Age' : [23, 25, 22],
    'City' : ['Dhaka', 'Tokyo', 'New York']
}

df = pd.DataFrame(data)
print("\nDataFrame from dictionary: \n", df)
print(type(df))




DataFrame from dictionary: 
      Name  Age      City
0   Nayem   23     Dhaka
1  Nobita   25     Tokyo
2    Jack   22  New York
<class 'pandas.core.frame.DataFrame'>


In [6]:
import numpy as np

np.array(df)

array([['Nayem', 23, 'Dhaka'],
       ['Nobita', 25, 'Tokyo'],
       ['Jack', 22, 'New York']], dtype=object)

In [None]:
## Create a dataframe from a list of dictionaries

data = [
    {'Name': 'Nayem', 'Age': 23, 'City': 'Dhaka'},
    {'Name': 'Nobita', 'Age': 25, 'City': 'Tokyo'},
    {'Name': 'Jack', 'Age': 22, 'City': 'New York'}
]
df = pd.DataFrame(data)
print("\nDataFrame from list of dictionaries: \n", df)
print(type(df))




DataFrame from list of dictionaries: 
      Name  Age      City
0   Nayem   23     Dhaka
1  Nobita   25     Tokyo
2    Jack   22  New York
<class 'pandas.core.frame.DataFrame'>


In [10]:
df = pd.read_csv('data.csv')
df.head(10)

Unnamed: 0,Date,Category,Value,Product,Sales,Region
0,2023-01-01,A,28.0,Product1,754.0,East
1,2023-01-02,B,39.0,Product3,110.0,North
2,2023-01-03,C,32.0,Product2,398.0,East
3,2023-01-04,B,8.0,Product1,522.0,East
4,2023-01-05,B,26.0,Product3,869.0,North
5,2023-01-06,B,54.0,Product3,192.0,West
6,2023-01-07,A,16.0,Product1,936.0,East
7,2023-01-08,C,89.0,Product1,488.0,West
8,2023-01-09,C,37.0,Product3,772.0,West
9,2023-01-10,A,22.0,Product2,834.0,West


In [11]:
df.tail(5)

Unnamed: 0,Date,Category,Value,Product,Sales,Region
45,2023-02-15,B,99.0,Product2,599.0,West
46,2023-02-16,B,6.0,Product1,938.0,South
47,2023-02-17,B,69.0,Product3,143.0,West
48,2023-02-18,C,65.0,Product3,182.0,North
49,2023-02-19,C,11.0,Product3,708.0,North


In [13]:
## Accessing data from DataFrame

df['Date'].tail(5)

45    2023-02-15
46    2023-02-16
47    2023-02-17
48    2023-02-18
49    2023-02-19
Name: Date, dtype: object

In [14]:
df.loc[3]

Date        2023-01-04
Category             B
Value              8.0
Product       Product1
Sales            522.0
Region            East
Name: 3, dtype: object

In [15]:
df.iloc[3]

Date        2023-01-04
Category             B
Value              8.0
Product       Product1
Sales            522.0
Region            East
Name: 3, dtype: object

In [17]:
df.iloc[0][0]

  df.iloc[0][0]


'2023-01-01'

In [18]:
## Accessing a specified element

df.at[1, 'Date']

'2023-01-02'

In [23]:
df.iat[10,3]

'Product1'

In [None]:
# Data manipulation with DataFrame