In [2]:
!pip install pandas
!pip install numpy

Collecting pandas
  Using cached pandas-2.2.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.2.5-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp313-cp313-win_amd64.whl (11.5 MB)
Downloading numpy-2.2.5-cp313-cp313-win_amd64.whl (12.6 MB)
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---- ----------------------------------- 1.3/12.6 MB 6.7 MB/s eta 0:00:02
   -------- ------------------------------- 2.6/12.6 MB 6.8 MB/s eta 0:00:02
   ------------- -------------------------- 4.2/12.6 MB 6.7 MB/s eta 0:00:02
   ----------------- ---------------------- 5.5/12.6 MB 6.7 MB/s eta 0:00:02
   --------------------- ------------------ 6.8/12.6 MB 6.7 MB/s eta 0:00:01
   --------------


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import numpy as np

#### 1. Creating a Pandas Series
- A Series is a one-dimensional labeled array capable of holding any data type.
- The labels are collectively known as the index.

In [19]:
# a) From a Python list (default integer index)
list_data = [10, 20, 30, 40, 50]
s_from_list = pd.Series(list_data)

print("Series from list (default index):\n", s_from_list)
print("-" * 20)

Series from list (default index):
 0    10
1    20
2    30
3    40
4    50
dtype: int64
--------------------


In [20]:
# b) From a Python list with a custom index
custom_index = ['a', 'b', 'c', 'd', 'e']
s_custom_index = pd.Series(list_data, index=custom_index)

print("Series from list (custom index):\n", s_custom_index)
print("-" * 20)

Series from list (custom index):
 a    10
b    20
c    30
d    40
e    50
dtype: int64
--------------------


In [21]:
# c) From a NumPy array
numpy_arr = np.array([1.1, 2.2, 3.3, 4.4])
index_np = ['w', 'x', 'y', 'z']

s_from_numpy = pd.Series(numpy_arr, index=index_np)

print("Series from NumPy array:\n", s_from_numpy)
print("-" * 20)

Series from NumPy array:
 w    1.1
x    2.2
y    3.3
z    4.4
dtype: float64
--------------------


In [22]:
# d) From a Python dictionary
# Dictionary keys become the index, values become the Series data.
dict_data = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

s_from_dict = pd.Series(dict_data)

print("Series from dictionary:\n", s_from_dict)
print("-" * 20)

Series from dictionary:
 Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64
--------------------


In [23]:
# If an index is provided with a dict, values are matched based on index labels.
states = ['California', 'Ohio', 'Oregon', 'Texas'
         ]
s_dict_with_index = pd.Series(dict_data, index=states)

print("Series from dict with explicit index (NaN for missing):\n", s_dict_with_index)

# Note: 'California' is NaN (Not a Number) as it wasn't in dict_data.
# 'Utah' from the dict is excluded as it wasn't in the `states` index.
print("-" * 20)

Series from dict with explicit index (NaN for missing):
 California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64
--------------------


In [24]:
# e) From a scalar value (value is repeated for each index label)
s_scalar = pd.Series(5., index=['a', 'b', 'c', 'd'])

print("Series from scalar:\n", s_scalar)
print("-" * 20)

Series from scalar:
 a    5.0
b    5.0
c    5.0
d    5.0
dtype: float64
--------------------


#### 2. Series Attributes

In [46]:
list_1 = [100, 200, 300, 400, 500]
NEW_index = ['A', 'B', 'C', 'D', 'E']
series_from_list = pd.Series(list_1, NEW_index)

print("Series from list with custom index:\n", series_from_list)

Series from list with custom index:
 A    100
B    200
C    300
D    400
E    500
dtype: int64


In [40]:
# .index: Get the index object
print(f"\nIndex: {series_from_list.index}")


Index: Index(['A', 'B', 'C', 'D', 'E'], dtype='object')


In [42]:
# .values: Get the data as a NumPy array

print(f"Values (NumPy array): {series_from_list.values}\n")

print(f"Type of values: {type(series_from_list.values)}")#type

Values (NumPy array): [100 200 300 400 500]

Type of values: <class 'numpy.ndarray'>


In [43]:
# .dtype: Get the data type of the elements
print(f"Data type (dtype): {series_from_list.dtype}")

Data type (dtype): int64


In [48]:
# .name: Get or set the name of the Series
series_from_list.name = 'MyNumbers'

print(f"Series name: {series_from_list.name}")

Series name: MyNumbers


In [50]:
# .index.name: Get or set the name of the index

series_from_list.index.name = 'Letters'

print(f"Index name: {series_from_list.index.name}")

print("\nSeries with names set:\n", series_from_list)

Index name: Letters

Series with names set:
 Letters
A    100
B    200
C    300
D    400
E    500
Name: MyNumbers, dtype: int64


In [51]:
# .size: Get the number of elements

print(f"\nSize (number of elements): {series_from_list.size}")


Size (number of elements): 5


In [52]:
# .shape: Get the shape (as a tuple)

print(f"Shape: {series_from_list.shape}") # Output: (5,) for a 1D Series
print("-" * 20)

Shape: (5,)
--------------------


#### 3. Index Object
- The Index object holds the axis labels. It's immutable (cannot be changed in place)
- and behaves like a fixed-size set but can contain duplicates.

In [54]:
list_1 = [100, 200, 300, 400, 500]
NEW_index = ['A', 'B', 'C', 'D', 'E']
series_from_list = pd.Series(list_1, NEW_index)

print("Series from list with custom index:\n", series_from_list)

Series from list with custom index:
 A    100
B    200
C    300
D    400
E    500
dtype: int64


In [53]:
idx = series_from_list.index

print("--- Index Object ---")
print(f"Index object: {idx}")

print(f"Is index unique? {idx.is_unique}")

--- Index Object ---
Index object: Index(['A', 'B', 'C', 'D', 'E'], dtype='object', name='Letters')
Is index unique? True


In [57]:
# Index objects support set operations (if they don't have duplicates)

idx2 = pd.Index(['c', 'd', 'e', 'f', 'g'])

print(f"New index: {idx2}\n")

print(f"Intersection: {idx.intersection(idx2)}\n")

print(f"Union: {idx.union(idx2)}\n")

print(f"Difference (idx - idx2): {idx.difference(idx2)}\n")
print("-" * 20)

New index: Index(['c', 'd', 'e', 'f', 'g'], dtype='object')

Intersection: Index([], dtype='object')

Union: Index(['A', 'B', 'C', 'D', 'E', 'c', 'd', 'e', 'f', 'g'], dtype='object')

Difference (idx - idx2): Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

--------------------


#### 4. Basic Operations & Accessing Elements

In [58]:
print("Series used (series_from_list):\n", series_from_list)

Series used (series_from_list):
 A    100
B    200
C    300
D    400
E    500
dtype: int64


In [71]:
# Accessing elements like a dictionary (using index labels)

print("Element at index 'C is: ", series_from_list['C'])

print(f"\nElement at index 'C' using fstring: {series_from_list['C']}")

Element at index 'C is:  300

Element at index 'C' using fstring: 300


In [75]:
# Accessing elements like a NumPy array (using integer position) - Use .iloc for clarity

print(f"Element at position 1 (using .iloc): {series_from_list.iloc[1]}")

Element at position 1 (using .iloc): 200


In [73]:
# Slicing using labels (inclusive of the end label)

print(f"\nSlice from 'B' to 'D' (label-based):\n{series_from_list['B':'D']}")


Slice from 'B' to 'D' (label-based):
B    200
C    300
D    400
dtype: int64


In [76]:
# Slicing using positions (exclusive of the end position) - Use .iloc

print(f"Slice from position 1 to 4 (position-based using .iloc):\n{series_from_list.iloc[1:4]}")

print(f"Slice from position 0 to 4:\n{series_from_list.iloc[:4]}")
print(f"Slice from position 1 to end:\n{series_from_list.iloc[1:]}")

Slice from position 1 to 4 (position-based using .iloc):
B    200
C    300
D    400
dtype: int64
Slice from position 0 to 4:
A    100
B    200
C    300
D    400
dtype: int64
Slice from position 1 to end:
B    200
C    300
D    400
E    500
dtype: int64


In [79]:
print(f"\nElements at label ['A', 'E', 'C'] are:\n{series_from_list[['A', 'E', 'C']]}")


Elements at label ['A', 'E', 'C'] are:
A    100
E    500
C    300
dtype: int64


In [81]:
# Getting multiple elements using a list of positions (.iloc)
print(f"\nElements at positions [0, 4, 2] (using .iloc):\n{series_from_list.iloc[[0, 4, 2]]}")


Elements at positions [0, 4, 2] (using .iloc):
A    100
E    500
C    300
dtype: int64


In [85]:
# Boolean indexing (like NumPy arrays)
print(f"\nElements > 25:\n{series_from_list[series_from_list > 25]}")


Elements > 25:
A    100
B    200
C    300
D    400
E    500
dtype: int64


In [92]:
# Vectorized arithmetic operations (like NumPy arrays)
print(f"\nSeries * 2:\n{series_from_list * 2}")
print(f"\nSeries + 100:\n{series_from_list + 100}")
print(f"\nApplying NumPy ufunc (np.exp):\n{np.exp(series_from_list / 10)}") # Example ufunc


Series * 2:
A     200
B     400
C     600
D     800
E    1000
dtype: int64

Series + 100:
A    200
B    300
C    400
D    500
E    600
dtype: int64

Applying NumPy ufunc (np.exp):
A    2.202647e+04
B    4.851652e+08
C    1.068647e+13
D    2.353853e+17
E    5.184706e+21
dtype: float64


In [93]:
# Check for existence of index labels
print(f"\nIs 'B' in index? {'B' in series_from_list}") # Output: True
print(f"Is 'F' in index? {'F' in series_from_list}") # Output: False
print("-" * 20)


Is 'B' in index? True
Is 'F' in index? False
--------------------


In [101]:
# Alignment during operations
# When performing operations between Series, Pandas aligns data based on index labels.
s1 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
s2 = pd.Series([10, 20, 30, 40], index=['b', 'c', 'd', 'e'])

print("s1:\n", s1)
print("s2:\n", s2)

s1:
 a    1
b    2
c    3
dtype: int64
s2:
 b    10
c    20
d    30
e    40
dtype: int64


In [102]:
print("Alignment Example:")
print("s1 + s2 (aligned by index):\n", s1 + s2)
# Note: 'a', 'd', 'e' result in NaN because they don't exist in both Series indices.

Alignment Example:
s1 + s2 (aligned by index):
 a     NaN
b    12.0
c    23.0
d     NaN
e     NaN
dtype: float64
