In [1]:
import numpy as np
import pandas as pd

### Different ways of creating Pandas Series
1. From a list
2. From a Numpy Array
3. From a Dictionary


In [2]:
import pandas as pd

# From a list
data = [10, 20, 30, 40, 50]
s1 = pd.Series(data)
print(s1)
# From a NumPy array
import numpy as np
data_array = np.array([10, 20, 30, 40, 50])
s2 = pd.Series(data_array)
print(s2)


0    10
1    20
2    30
3    40
4    50
dtype: int64
0    10
1    20
2    30
3    40
4    50
dtype: int32


In [3]:
#From a dict
data_dict = {'A': 10, 'B': 20, 'C': 30, 'D': 40, 'E': 50}
s_dict = pd.Series(data_dict)
print(s_dict)

A    10
B    20
C    30
D    40
E    50
dtype: int64


## 2. Operations on Pandas Series 
1. Indexing and Slicing
2. Mean and Max functions
3. Sorting
4. Unique values



In [4]:
import pandas as pd

# Creating a Pandas Series
data = {'A': 10, 'B': 20, 'C': 30, 'D': 40, 'E': 50}
s = pd.Series(data)

# Indexing and Slicing
print("Element at index 'B':", s['B'])
print("Elements from index 'B' to 'D':")
print(s['B':'D'])

# Descriptive Statistics
print("\nMean:", s.mean())
print("Max:", s.max())

# Boolean Operations
print("\nElements greater than 25:")
print(s[s > 25])

# Sorting
print("\nSorted by index:")
print(s.sort_index())
print("\nSorted by values:")
print(s.sort_values(ascending=False))

# Unique Values and Value Counts
print("\nUnique values:")
print(s.unique())
print("\nValue counts:")
print(s.value_counts())



Element at index 'B': 20
Elements from index 'B' to 'D':
B    20
C    30
D    40
dtype: int64

Mean: 30.0
Max: 50

Elements greater than 25:
C    30
D    40
E    50
dtype: int64

Sorted by index:
A    10
B    20
C    30
D    40
E    50
dtype: int64

Sorted by values:
E    50
D    40
C    30
B    20
A    10
dtype: int64

Unique values:
[10 20 30 40 50]

Value counts:
10    1
20    1
30    1
40    1
50    1
dtype: int64


## Dataframe
Data frames can be created from
1. A list
2. A dictionary
3. A csv file

In [5]:
#1. Dataframes created from a list

# Creating a list of lists
data = [[1, 'John', 25],
        [2, 'Alice', 30],
        [3, 'Bob', 35]]

# Creating a DataFrame from the list
df = pd.DataFrame(data, columns=['ID', 'Name', 'Age'])

print(df)


   ID   Name  Age
0   1   John   25
1   2  Alice   30
2   3    Bob   35


In [6]:
#Data frame created from a dictionary

data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}

df_dict = pd.DataFrame(data)
print(df_dict)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [9]:
df = pd.read_csv('Telco-Churn.csv')
df

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


### Working with Dataframe operations

In [None]:
#head by default displays the first 5 rows
print(df.head())
#tail by default display the last 5 rows
print(df.tail())
#describe describes the dataset
print(df.describe())

In [None]:
#shape function displays the rows and columns in the dataset
print("\n The shape of the dataset:")
df.shape


In [None]:
#columns function displays the columns in the dataset
print("\nThe column headings of the dataset:")
df.columns

In [None]:
#printing values
print("\n The values of the dataset:")
df.values

In [None]:
#dropping specific columns
print("\nDrop 'Gender' column:")
print(df.drop(columns=['gender']))


In [None]:
#removing duplicate values
print("\nDrop duplicates:")
print(df.drop_duplicates())

In [None]:
#Checking  null values
# fillna()
print("\nFill missing values with 0:")
print(df.fillna(0))

In [None]:
# Checking for missing values and non-missing values
print("\nCheck for missing values:")
print(df.isnull())
print("\nCheck for non-missing values:")
print(df.notnull())

In [None]:
#Getting summary statistics of a specific column
# mean() / median() / sum() / min() / max()
print("\nSummary statistics of tenure:")
print("Mean :", df['tenure'].mean())
print("Median :", df['tenure'].median())
print("Total :", df['tenure'].sum())
print("Minimum :", df['tenure'].min())
print("Maximum :", df['tenure'].max())


## DataFrame Indexing operations 

In [None]:
# Selecting Columns
print("Selecting Columns:")
print(df['customerID'])  # Using square brackets
print(df.tenure)      # Using dot notation

In [None]:
# Selecting Rows
print("\nSelecting Rows:")
print("Using .loc[]:")
print(df.loc[1])   # Label-based indexing
print("Using .iloc[]:")
print(df.iloc[2])  # Integer-based indexing
print("Using boolean indexing:")
print(df[df['tenure'] > 30])  # Boolean indexing

In [None]:
# Selecting Rows and Columns
print("\nSelecting Rows and Columns:")
print(df.loc[1, 'customerID'])   # Label-based indexing
print(df.iloc[2, 1])       # Integer-based indexing
print(df[df['tenure'] > 30]['customerID'])  # Boolean indexing for rows and selecting column

In [None]:
# Conditional Selection
print("\nConditional Selection:")
print(df[df['gender'] == 'Male'])  # Selecting rows based on a condition

In [None]:
# Setting Index
print("\nSetting Index:")
df_indexed = df.set_index('customerID')
print(df_indexed)

In [None]:
# Sorting by a single column
sorted_df = df.sort_values(by='tenure')
print("\nSorted DataFrame:")
print(sorted_df)


