In [1]:
import pandas as pd
import numpy as np

# Series

In [2]:
cities = ["Delhi","Mumbai","Bengaluru","Pune","Chennai","Kolkata","Goa","Indore","Lucknow","Hydrabad"]
cities

['Delhi',
 'Mumbai',
 'Bengaluru',
 'Pune',
 'Chennai',
 'Kolkata',
 'Goa',
 'Indore',
 'Lucknow',
 'Hydrabad']

In [4]:
city_names = pd.Series(cities)  # Converting python list to a pandas series
city_names

0        Delhi
1       Mumbai
2    Bengaluru
3         Pune
4      Chennai
5      Kolkata
6          Goa
7       Indore
8      Lucknow
9     Hydrabad
dtype: object

In [6]:
city_names[9] = "Hyderabad"
city_names

0        Delhi
1       Mumbai
2    Bengaluru
3         Pune
4      Chennai
5      Kolkata
6          Goa
7       Indore
8      Lucknow
9    Hyderabad
dtype: object

In [7]:
city_names[2:8]

2    Bengaluru
3         Pune
4      Chennai
5      Kolkata
6          Goa
7       Indore
dtype: object

In [9]:
# adding strings as row index
city_ser = pd.Series(cities,index = ('DEL','MUM','BNG','PUN','CHN','KOL','GOA','IND','LCK','HYD'))
city_ser

DEL        Delhi
MUM       Mumbai
BNG    Bengaluru
PUN         Pune
CHN      Chennai
KOL      Kolkata
GOA          Goa
IND       Indore
LCK      Lucknow
HYD     Hydrabad
dtype: object

In [10]:
city_ser['MUM']

'Mumbai'

In [11]:
# Create Pandas Series from python dictionaries
currency  = {
    "USD":"US Dollar",
    "INR":"Indian Rupee",
    "SGD":"Singapore Dollar",
    "THB":"Thai Baht",
    "CAD":"Canadian Dollar"
}
currency

{'USD': 'US Dollar',
 'INR': 'Indian Rupee',
 'SGD': 'Singapore Dollar',
 'THB': 'Thai Baht',
 'CAD': 'Canadian Dollar'}

In [13]:
currency_ser = pd.Series(currency)
currency_ser

USD           US Dollar
INR        Indian Rupee
SGD    Singapore Dollar
THB           Thai Baht
CAD     Canadian Dollar
dtype: object

In [14]:
currency_ser['RUB'] = 'Russian Rubble'  # append a new row element to existing series
currency_ser

USD           US Dollar
INR        Indian Rupee
SGD    Singapore Dollar
THB           Thai Baht
CAD     Canadian Dollar
RUB      Russian Rubble
dtype: object

In [15]:
# get all row index
currency_ser.index

Index(['USD', 'INR', 'SGD', 'THB', 'CAD', 'RUB'], dtype='object')

In [16]:
# get all values
currency_ser.values

array(['US Dollar', 'Indian Rupee', 'Singapore Dollar', 'Thai Baht',
       'Canadian Dollar', 'Russian Rubble'], dtype=object)

In [17]:
dir(currency_ser)

['CAD',
 'INR',
 'RUB',
 'SGD',
 'T',
 'THB',
 'USD',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '

# DataFrames

In [4]:
df = pd.DataFrame(cities)
df

Unnamed: 0,0
0,Delhi
1,Mumbai
2,Bengaluru
3,Pune
4,Chennai
5,Kolkata
6,Goa
7,Indore
8,Lucknow
9,Hydrabad


In [6]:
# Creating a DataFrame from a dictionary

data_dict = {
    "empID": ['EMP384','EMP343','EMP198','EMP486','EMP298'],
    "empName": ['A','B','C','D','E'],
    "empSalary": [1000,3000,4000,500,1982]
}

print(data_dict)

{'empID': ['EMP384', 'EMP343', 'EMP198', 'EMP486', 'EMP298'], 'empName': ['A', 'B', 'C', 'D', 'E'], 'empSalary': [1000, 3000, 4000, 500, 1982]}


In [7]:
df = pd.DataFrame(data_dict)
df

Unnamed: 0,empID,empName,empSalary
0,EMP384,A,1000
1,EMP343,B,3000
2,EMP198,C,4000
3,EMP486,D,500
4,EMP298,E,1982


In [10]:
# set the row indx to a col in the DataFrame

df.set_index('empID',inplace=True)
df

Unnamed: 0_level_0,empName,empSalary
empID,Unnamed: 1_level_1,Unnamed: 2_level_1
EMP384,A,1000
EMP343,B,3000
EMP198,C,4000
EMP486,D,500
EMP298,E,1982


In [13]:
df.loc['EMP486']

empName        D
empSalary    500
Name: EMP486, dtype: object

In [14]:
# creating a DataFrame using tuples

data = [
    ('English','French','Spanish','German'),
    (101,201,301,401),
    (67.54,78.9,59.76,92.66)
]

data

[('English', 'French', 'Spanish', 'German'),
 (101, 201, 301, 401),
 (67.54, 78.9, 59.76, 92.66)]

In [15]:
df = pd.DataFrame(data,index = ['Subject','Course_Code','Score'])  # custom row index
df

Unnamed: 0,0,1,2,3
Subject,English,French,Spanish,German
Course_Code,101,201,301,401
Score,67.54,78.9,59.76,92.66


In [17]:
data = [
    ('English',101,67.54),
    ('French',201,78.9),
    ('Spanish',301,59.76),
    ('German',401,92.66)
]

data

[('English', 101, 67.54),
 ('French', 201, 78.9),
 ('Spanish', 301, 59.76),
 ('German', 401, 92.66)]

In [18]:
df = pd.DataFrame(data,columns = ['Subject','Course_Code','Score'])  # custom column names
df

Unnamed: 0,Subject,Course_Code,Score
0,English,101,67.54
1,French,201,78.9
2,Spanish,301,59.76
3,German,401,92.66


# Read the Data from external sources into pandas DataFrame

- CSV File = pd.read_csv()
- Excel = pd.read_excel()
- SQL = pd.read_sql()
- text = pd.read()
- HTML = pd.read_html()

In [None]:
# the connection object is to be CREATED before executing the read_sql() based on the library of your DB
pd.read_sql('select * from some_table',con=COX)

In [20]:
apple_stock_price = pd.read_csv('AAPL.csv')
apple_stock_price

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-29,100.589996,100.690002,98.040001,99.620003,93.514290,142718700
1,2014-10-06,99.949997,102.379997,98.309998,100.730003,94.556244,280258200
2,2014-10-13,101.330002,101.779999,95.180000,97.669998,91.683792,358539800
3,2014-10-20,98.320000,105.489998,98.220001,105.220001,98.771042,358532900
4,2014-10-27,104.849998,108.040001,104.699997,108.000000,101.380676,220230600
...,...,...,...,...,...,...,...
179,2018-03-05,175.210007,180.000000,174.270004,179.979996,179.979996,139852700
180,2018-03-12,180.289993,183.500000,177.619995,178.020004,178.020004,155417500
181,2018-03-19,177.320007,177.470001,164.940002,164.940002,164.940002,172670700
182,2018-03-26,168.070007,175.149994,165.190002,167.779999,167.779999,157898400


In [26]:
dict(df)
list(df)
tuple(df)

{'Subject': 0    English
 1     French
 2    Spanish
 3     German
 Name: Subject, dtype: object,
 'Course_Code': 0    101
 1    201
 2    301
 3    401
 Name: Course_Code, dtype: int64,
 'Score': 0    67.54
 1    78.90
 2    59.76
 3    92.66
 Name: Score, dtype: float64}

# Most used operations on DataFrames

In [29]:
# getting the shape of DataFrame
apple_stock_price.shape  # (4,3) - first element is the num of rows, the second element is num of cols

(184, 7)

In [30]:
apple_stock_price.index

RangeIndex(start=0, stop=184, step=1)

In [31]:
apple_stock_price.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [33]:
apple_stock_price['Volume'].max()

500363000

In [34]:
np.max(apple_stock_price['Volume'])

500363000

In [35]:
np.min(apple_stock_price['Volume'])

38398505

In [37]:
print(apple_stock_price['Volume'].min())
print(apple_stock_price['Volume'].mean())
print(apple_stock_price['Volume'].median())

38398505
191016776.11413044
174411150.0


In [39]:
apple_stock_price['Volume'].dtype

dtype('int64')

In [41]:
apple_stock_price['Open'].dtype

dtype('float64')

In [42]:
apple_stock_price.describe()  # Statistical Summary

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,184.0,184.0,184.0,184.0,184.0,184.0
mean,127.040245,129.918098,124.340381,127.352663,123.838032,191016800.0
std,24.314449,24.577906,24.176736,24.356534,25.655569,81602810.0
min,92.389999,93.769997,89.470001,90.519997,87.796822,38398500.0
25%,108.882499,112.362499,106.152502,109.204998,105.301765,127370600.0
50%,118.895,120.865002,116.870003,119.190002,114.99973,174411200.0
75%,144.935005,147.695,143.227493,144.704994,143.039467,220775500.0
max,180.289993,183.5,177.619995,179.979996,179.979996,500363000.0


In [43]:
apple_stock_price.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184 entries, 0 to 183
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       184 non-null    object 
 1   Open       184 non-null    float64
 2   High       184 non-null    float64
 3   Low        184 non-null    float64
 4   Close      184 non-null    float64
 5   Adj Close  184 non-null    float64
 6   Volume     184 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 10.2+ KB


In [46]:
apple_stock_price.head(3)  # default value is 5

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-29,100.589996,100.690002,98.040001,99.620003,93.51429,142718700
1,2014-10-06,99.949997,102.379997,98.309998,100.730003,94.556244,280258200
2,2014-10-13,101.330002,101.779999,95.18,97.669998,91.683792,358539800


In [49]:
apple_stock_price.tail(3)  # default value is 5

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
181,2018-03-19,177.320007,177.470001,164.940002,164.940002,164.940002,172670700
182,2018-03-26,168.070007,175.149994,165.190002,167.779999,167.779999,157898400
183,2018-03-29,167.804993,171.75,166.899994,167.779999,167.779999,38398505


- when you create a object from a class - 2 things gets associated - Methods & Attributes
- Methods are functions that you can call on the object - func()
- Attributes are constant values stored for that object - obj.shape

# Accessing Values from a DataFrame

In [50]:
apple_stock_price['Date']  # accessing single col

0      2014-09-29
1      2014-10-06
2      2014-10-13
3      2014-10-20
4      2014-10-27
          ...    
179    2018-03-05
180    2018-03-12
181    2018-03-19
182    2018-03-26
183    2018-03-29
Name: Date, Length: 184, dtype: object

In [51]:
apple_stock_price[['Date','Open']]  # accessing multiple cols

Unnamed: 0,Date,Open
0,2014-09-29,100.589996
1,2014-10-06,99.949997
2,2014-10-13,101.330002
3,2014-10-20,98.320000
4,2014-10-27,104.849998
...,...,...
179,2018-03-05,175.210007
180,2018-03-12,180.289993
181,2018-03-19,177.320007
182,2018-03-26,168.070007


In [52]:
apple_stock_price.at[1,'Close']  # Close price for row index 1

100.730003

In [53]:
apple_stock_price.loc[0]

Date         2014-09-29
Open         100.589996
High         100.690002
Low           98.040001
Close         99.620003
Adj Close      93.51429
Volume        142718700
Name: 0, dtype: object

In [None]:
apple_stock_price.set_index('Date',inplace=True)

In [56]:
apple_stock_price

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-29,100.589996,100.690002,98.040001,99.620003,93.514290,142718700
2014-10-06,99.949997,102.379997,98.309998,100.730003,94.556244,280258200
2014-10-13,101.330002,101.779999,95.180000,97.669998,91.683792,358539800
2014-10-20,98.320000,105.489998,98.220001,105.220001,98.771042,358532900
2014-10-27,104.849998,108.040001,104.699997,108.000000,101.380676,220230600
...,...,...,...,...,...,...
2018-03-05,175.210007,180.000000,174.270004,179.979996,179.979996,139852700
2018-03-12,180.289993,183.500000,177.619995,178.020004,178.020004,155417500
2018-03-19,177.320007,177.470001,164.940002,164.940002,164.940002,172670700
2018-03-26,168.070007,175.149994,165.190002,167.779999,167.779999,157898400


In [60]:
apple_stock_price.loc['2018-03-26']  # for accessing named row index

Open         1.680700e+02
High         1.751500e+02
Low          1.651900e+02
Close        1.677800e+02
Adj Close    1.677800e+02
Volume       1.578984e+08
Name: 2018-03-26, dtype: float64

In [61]:
apple_stock_price.loc['2018-03-26':'2018-03-30'] 

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-03-26,168.070007,175.149994,165.190002,167.779999,167.779999,157898400
2018-03-29,167.804993,171.75,166.899994,167.779999,167.779999,38398505


In [63]:
apple_stock_price.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-29,100.589996,100.690002,98.040001,99.620003,93.51429,142718700
2014-10-06,99.949997,102.379997,98.309998,100.730003,94.556244,280258200
2014-10-13,101.330002,101.779999,95.18,97.669998,91.683792,358539800
2014-10-20,98.32,105.489998,98.220001,105.220001,98.771042,358532900
2014-10-27,104.849998,108.040001,104.699997,108.0,101.380676,220230600


In [64]:
apple_stock_price.iloc[1,4]  # can use integer based index access to the rows and cols

94.556244

In [72]:
# slicing patterns on iloc
apple_stock_price.iloc[1:10,3:5]

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-10-06,99.949997,102.379997,98.309998,100.730003,94.556244
2014-10-13,101.330002,101.779999,95.18,97.669998,91.683792
2014-10-20,98.32,105.489998,98.220001,105.220001,98.771042
2014-10-27,104.849998,108.040001,104.699997,108.0,101.380676
2014-11-03,108.220001,110.300003,107.720001,109.010002,102.328766
2014-11-10,109.019997,114.190002,108.400002,114.18,107.646675
2014-11-17,114.269997,117.57,113.300003,116.470001,109.805626
2014-11-24,116.849998,119.75,116.620003,118.93,112.124863
2014-12-01,118.809998,119.25,111.269997,115.0,108.419746


# Add or Remove a col from the DataFrame

In [74]:
df

Unnamed: 0,Subject,Course_Code,Score
0,English,101,67.54
1,French,201,78.9
2,Spanish,301,59.76
3,German,401,92.66


In [77]:
df['Grade'] = pd.Series(['B','B+','C','A'])
df

Unnamed: 0,Subject,Course_Code,Score,Grade
0,English,101,67.54,B
1,French,201,78.9,B+
2,Spanish,301,59.76,C
3,German,401,92.66,A


In [78]:
df.drop('Score',axis=1)  # Axis = 1 means, you want to drop a col 

Unnamed: 0,Subject,Course_Code,Grade
0,English,101,B
1,French,201,B+
2,Spanish,301,C
3,German,401,A


In [80]:
df.drop(0,axis=0,inplace=True)  # Axis = 0 it will mean to drop a row
df

Unnamed: 0,Subject,Course_Code,Score,Grade
1,French,201,78.9,B+
2,Spanish,301,59.76,C
3,German,401,92.66,A


In [None]:
df = df.drop('Score',axis=1) # assingment statement will permanently update the original dataframe 

# Concatenation and Merging of DataFrames

In [81]:
subject = df.copy()
subject

Unnamed: 0,Subject,Course_Code,Score,Grade
1,French,201,78.9,B+
2,Spanish,301,59.76,C
3,German,401,92.66,A


In [82]:
data = {
    "Student_Name":["AA","BB","CC"],
    "Course_Code":[201,401,301]
}

students = pd.DataFrame(data)
students

Unnamed: 0,Student_Name,Course_Code
0,AA,201
1,BB,401
2,CC,301


In [86]:
print(subject)
print()
print(students)

   Subject  Course_Code  Score Grade
1   French          201  78.90    B+
2  Spanish          301  59.76     C
3   German          401  92.66     A

  Student_Name  Course_Code
0           AA          201
1           BB          401
2           CC          301


In [83]:
pd.concat([subject,students],ignore_index=True)  
# this works when you have same col names and want to add rows of two dataframes to a single dataframe

Unnamed: 0,Subject,Course_Code,Score,Grade,Student_Name
0,French,201,78.9,B+,
1,Spanish,301,59.76,C,
2,German,401,92.66,A,
3,,201,,,AA
4,,401,,,BB
5,,301,,,CC


In [90]:
eng_row = pd.DataFrame([('English',101,45.67,'C')],columns=['Subject','Course_Code','Score','Grade'])
eng_row

Unnamed: 0,Subject,Course_Code,Score,Grade
0,English,101,45.67,C


In [93]:
pd.concat([subject,eng_row],ignore_index=True)

Unnamed: 0,Subject,Course_Code,Score,Grade
0,French,201,78.9,B+
1,Spanish,301,59.76,C
2,German,401,92.66,A
3,English,101,45.67,C


In [88]:
pd.merge(subject,students, on = ['Course_Code'])  # when you need to merge on a col ID the rows of diff dataframes

Unnamed: 0,Subject,Course_Code,Score,Grade,Student_Name
0,French,201,78.9,B+,AA
1,Spanish,301,59.76,C,CC
2,German,401,92.66,A,BB


In [95]:
df = pd.read_csv('Telco-Customer-Churn.csv')
df

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [97]:
df['PaymentMethod'].value_counts()

Electronic check             2365
Mailed check                 1612
Bank transfer (automatic)    1544
Credit card (automatic)      1522
Name: PaymentMethod, dtype: int64

In [99]:
gb = df.groupby('PaymentMethod')

In [100]:
gb.first()

Unnamed: 0_level_0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,Churn
PaymentMethod,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Bank transfer (automatic),7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,42.3,1840.75,No
Credit card (automatic),1452-KIOVK,Male,0,No,Yes,22,Yes,Yes,Fiber optic,No,Yes,No,No,Yes,No,Month-to-month,Yes,89.1,1949.4,No
Electronic check,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,29.85,29.85,No
Mailed check,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,56.95,1889.5,No


In [109]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [108]:
# Total contart charges for each type of contract

df.groupby('Contract')['MonthlyCharges'].sum()

Contract
Month-to-month    257294.15
One year           95816.60
Two year          103005.85
Name: MonthlyCharges, dtype: float64

In [103]:
# Avg charges

df.groupby('PaymentMethod')['MonthlyCharges'].mean()

PaymentMethod
Bank transfer (automatic)    67.192649
Credit card (automatic)      66.512385
Electronic check             76.255814
Mailed check                 43.917060
Name: MonthlyCharges, dtype: float64