# Series df[series] 

In [1]:
import pandas as pd

## 1. Pandas Series: Series Structure

In [25]:
import pandas as pd
import seaborn as sns

# Load dataset
df = sns.load_dataset('titanic')
print(df.head())

   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  


### 1. Basic Series Properties

In [26]:
df['age'].index

RangeIndex(start=0, stop=891, step=1)

In [27]:
df['age'].name

'age'

In [28]:
df['age'].dtype

dtype('float64')

In [29]:
df['age'].dtypes

dtype('float64')

In [30]:
df['age'].shape

(891,)

In [31]:
df['age'].ndim

1

In [32]:
df['age'].size

891

In [33]:
df['age'].values[:5]

array([22., 38., 26., 35., 35.])

In [34]:
df['age'].array[:5]

<NumpyExtensionArray>
[np.float64(22.0), np.float64(38.0), np.float64(26.0), np.float64(35.0),
 np.float64(35.0)]
Length: 5, dtype: float64

### 2. Boolean Checks

In [35]:
df['age'].empty

False

In [36]:
df['age'].hasnans

True

In [37]:
df['age'].isna().sum()

np.int64(177)

### 3. Advanced Attributes (Less Common)

In [38]:
df['age'].flags

<Flags(allows_duplicate_labels=True)>

In [39]:
df['age'].set_flags(allows_duplicate_labels=False)

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, Length: 891, dtype: float64

In [40]:
df['age'].attrs

{}

## 2. Pandas Series: Access & Selection

### 1. Label-Based & Positional Access

In [41]:

df['age'].at[0]


np.float64(22.0)

In [42]:
df['age'].iat[10]

np.float64(4.0)

In [43]:
df['age'].iloc[10:]

10      4.0
11     58.0
12     20.0
13     39.0
14     14.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, Length: 881, dtype: float64

In [44]:
df.loc[0, 'age']

np.float64(22.0)

In [45]:
print(df.loc[[0,2,15,6,10,11], 'age'])

0     22.0
2     26.0
15    55.0
6     54.0
10     4.0
11    58.0
Name: age, dtype: float64


In [46]:
df.loc[0, ['age', 'fare', 'sex']]

age     22.0
fare    7.25
sex     male
Name: 0, dtype: object

In [47]:
df.loc[[1, 3, 5], ['sex', 'age', 'fare']]

Unnamed: 0,sex,age,fare
1,female,38.0,71.2833
3,female,35.0,53.1
5,male,,8.4583


In [48]:
df.loc[0:4, 'age':'fare']

Unnamed: 0,age,sibsp,parch,fare
0,22.0,1,0,7.25
1,38.0,1,0,71.2833
2,26.0,0,0,7.925
3,35.0,1,0,53.1
4,35.0,0,0,8.05


### 2. Value Retrieval & Conversion

In [49]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [50]:
df['age'].get(0)  

np.float64(22.0)

In [51]:
df['fare'].get(2)

np.float64(7.925)

In [52]:
df['age'].iloc[[0]].item()

22.0

In [53]:
df['fare'].array[:10]

<NumpyExtensionArray>
[   np.float64(7.25), np.float64(71.2833),   np.float64(7.925),
    np.float64(53.1),    np.float64(8.05),  np.float64(8.4583),
 np.float64(51.8625),  np.float64(21.075), np.float64(11.1333),
 np.float64(30.0708)]
Length: 10, dtype: float64

In [54]:
df['age'].items

<bound method Series.items of 0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, Length: 891, dtype: float64>

In [55]:
for i, val in df['age'].items():
    print(i, val)
    if i >= 4:
        break

0 22.0
1 38.0
2 26.0
3 35.0
4 35.0


In [56]:
df['age'].iter()

AttributeError: 'Series' object has no attribute 'iter'

### 3. Data Removal & Iteration

### 4. Advanced Selection

In [58]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [60]:
df['age']

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, Length: 891, dtype: float64

In [62]:
df['age'].add(100)

0      122.0
1      138.0
2      126.0
3      135.0
4      135.0
       ...  
886    127.0
887    119.0
888      NaN
889    126.0
890    132.0
Name: age, Length: 891, dtype: float64

In [63]:
df['age'].div(5)

0      4.4
1      7.6
2      5.2
3      7.0
4      7.0
      ... 
886    5.4
887    3.8
888    NaN
889    5.2
890    6.4
Name: age, Length: 891, dtype: float64

In [64]:
df['age'].truediv(5)

0      4.4
1      7.6
2      5.2
3      7.0
4      7.0
      ... 
886    5.4
887    3.8
888    NaN
889    5.2
890    6.4
Name: age, Length: 891, dtype: float64

In [65]:
df['age'].radd(100)

0      122.0
1      138.0
2      126.0
3      135.0
4      135.0
       ...  
886    127.0
887    119.0
888      NaN
889    126.0
890    132.0
Name: age, Length: 891, dtype: float64

In [66]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [68]:
print(df['age'].sub(10))
print(df['age'].rsub(10))

0      12.0
1      28.0
2      16.0
3      25.0
4      25.0
       ... 
886    17.0
887     9.0
888     NaN
889    16.0
890    22.0
Name: age, Length: 891, dtype: float64
0     -12.0
1     -28.0
2     -16.0
3     -25.0
4     -25.0
       ... 
886   -17.0
887    -9.0
888     NaN
889   -16.0
890   -22.0
Name: age, Length: 891, dtype: float64


In [76]:
a = df[df['age'].eq(12)]
a

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
125,1,3,male,12.0,1,0,11.2417,C,Third,child,False,,Cherbourg,yes,False


## 3. Pandas Series:Math Operations

## 4. Pandas Series: Statistical Summaries

## 5. Pandas Series: Cumulative, Ranking & Rolling

## 6. Pandas Series: Advanced Statistics

## 7. Pandas Series: Missing Data Handling

## 8. Pandas Series: Conditional Logic & Boolean Masking

## 9. Pandas Series: Type Conversion & Copying

## 10. Pandas Series: Function Mapping & Transformation

## 11. Pandas Series: Time Series Handling

## 12. Pandas Series: String Operations

## 13. Pandas Series: Categoricals 

## 14. Pandas Series: Sparse Data Handling

In [2]:
import pandas as pd
import numpy as np

# Create a dense series with mostly zeros
dense_series = pd.Series([0, 0, 0, 5, 0, 0, 8, 0, 0, 0])

# Convert to sparse format
sparse_series = dense_series.astype('Sparse[int]')

In [3]:
dense_series 

0    0
1    0
2    0
3    5
4    0
5    0
6    8
7    0
8    0
9    0
dtype: int64

In [4]:
sparse_series

0    0
1    0
2    0
3    5
4    0
5    0
6    8
7    0
8    0
9    0
dtype: Sparse[int64, 0]

In [5]:
type(sparse_series)

pandas.core.series.Series

In [6]:
# Series with mostly -1 values
data = pd.Series([-1, -1, 42, -1, -1, 99])

# Create a SparseDtype with custom fill_value
from pandas import SparseDtype
sparse_dtype = SparseDtype(int, fill_value=-1)

# Specify fill_value using the SparseDtype
sparse_with_fill = data.astype(sparse_dtype)

In [7]:
sparse_with_fill

0    -1
1    -1
2    42
3    -1
4    -1
5    99
dtype: Sparse[int64, -1]

In [8]:
# Number of non-fill values
print(sparse_series.sparse.npoints)  # Output: 2

# Density ratio (non-fill/total)
print(sparse_series.sparse.density)  # Output: 0.2

# The fill value being used
print(sparse_series.sparse.fill_value)  # Output: 0

# Access just the non-fill values
print(sparse_series.sparse.sp_values)  # Output: [5 8]

2
0.2
0
[5 8]


In [9]:
# Create large sparse and dense series
large_dense = pd.Series([0]*1_000_000 + [1, 2, 3])
large_sparse = large_dense.astype('Sparse[int]')

print(f"Dense memory: {large_dense.memory_usage(deep=True)/1e6:.1f} MB")
print(f"Sparse memory: {large_sparse.memory_usage(deep=True)/1e6:.1f} MB")

Dense memory: 8.0 MB
Sparse memory: 0.0 MB


In [10]:
# Sparse-aware operations maintain sparsity
doubled = sparse_series * 2  # Still sparse
print(doubled)
print()

# Operations may auto-convert to dense when needed
sqrt_values = np.sqrt(sparse_series)  # Becomes dense
print(sqrt_values)



0     0
1     0
2     0
3    10
4     0
5     0
6    16
7     0
8     0
9     0
dtype: Sparse[int64, 0]

0    0.000000
1    0.000000
2    0.000000
3    2.236068
4    0.000000
5    0.000000
6    2.828427
7    0.000000
8    0.000000
9    0.000000
dtype: Sparse[float64, 0.0]


In [13]:
from scipy.sparse import csr_matrix
import numpy as np

# Method 1: Convert sparse values and indices directly to CSR format
indices = np.array([i for i, val in enumerate(sparse_series) if val != sparse_series.sparse.fill_value])
values = sparse_series.sparse.sp_values
sparse_matrix = csr_matrix((values, (indices, np.zeros_like(indices))), 
						   shape=(len(sparse_series), 1))

# Method 2: Alternative approach using scipy directly
sparse_array = csr_matrix(sparse_series.to_numpy()[:, np.newaxis])

print("Method 1 result:")
print(sparse_matrix)
print("\nMethod 2 result:")
print(sparse_array)

Method 1 result:
  (3, 0)	5
  (6, 0)	8

Method 2 result:
  (3, 0)	5
  (6, 0)	8


## 15. Pandas Series: Export & Conversion

## 16. Pandas Series: Advanced & Utility Methods

In [17]:
import pandas as pd
import numpy as np

# Create sample data with different indices
series1 = pd.Series([10, 20, 30, 40], index=['A', 'B', 'C', 'D'], name='Sales_Q1')
series2 = pd.Series([15, 25, 35], index=['B', 'C', 'E'], name='Sales_Q2')

print("Original Series:")
print("Series 1:\n", series1)
print("\nSeries 2:\n", series2)

Original Series:
Series 1:
 A    10
B    20
C    30
D    40
Name: Sales_Q1, dtype: int64

Series 2:
 B    15
C    25
E    35
Name: Sales_Q2, dtype: int64


In [15]:
# Example 1: Outer join (default) - includes all indices
aligned_s1, aligned_s2 = series1.align(series2, join='outer')
print("\n1. Outer Join Alignment:")
print("Aligned Series 1:\n", aligned_s1)
print("Aligned Series 2:\n", aligned_s2)


1. Outer Join Alignment:
Aligned Series 1:
 A    10.0
B    20.0
C    30.0
D    40.0
E     NaN
Name: Sales_Q1, dtype: float64
Aligned Series 2:
 A     NaN
B    15.0
C    25.0
D     NaN
E    35.0
Name: Sales_Q2, dtype: float64
