In [None]:
# Pandas - Python Data Analysis Tool
# Pandas is an open-source Python Library providing high performance
# data manipulation and analysis tool
# Tools for loading data into in-memory data objects from different file formats
# Pandas deals with the following data structures
# a.) Series
# b.) DataFrame

# Series is a one-dimensional labeled array capable of holding data of any type

# A Data frame is a two-dimensional data structure, i.e,
# data is alligned in a tabular fashion in rows and columns


In [None]:
# Series
import pandas as pd
import numpy as np

In [None]:
arr = np.array([10,20,30,40,50])
print(arr)

[10 20 30 40 50]


In [None]:
series = pd.Series(arr)
print(series)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [None]:
series = pd.Series(data=[13,27,45,67],index=['firstPersonAge','secondPersonAge',
                                             'thirdPersonAge','forthPersonAge'])
print(series)
print(series['firstPersonAge'])
print(series['forthPersonAge'])
print(series.get('secondPersonAge'))

firstPersonAge     13
secondPersonAge    27
thirdPersonAge     45
forthPersonAge     67
dtype: int64
13
67
27


In [None]:
for data in series:
  print(data)

13
27
45
67


In [None]:
for data in series.iteritems():
  print(data)

('firstPersonAge', 13)
('secondPersonAge', 27)
('thirdPersonAge', 45)
('forthPersonAge', 67)


In [None]:
# DataFrame
stInfo = {'Id':[1,2,3,4] , 'Name':['Tom','Alex','Smith','John'] , 
          'Course':['AI','ML','DS','DL']}
print(stInfo)

{'Id': [1, 2, 3, 4], 'Name': ['Tom', 'Alex', 'Smith', 'John'], 'Course': ['AI', 'ML', 'DS', 'DL']}


In [None]:
data = pd.DataFrame(stInfo)
data

Unnamed: 0,Id,Name,Course
0,1,Tom,AI
1,2,Alex,ML
2,3,Smith,DS
3,4,John,DL


In [None]:
data = {'calories':[450,345,190],'duration':[45,30,15]}
df = pd.DataFrame(data,index=['Day1','Day2','Day3'])
df  # vs print(df)

Unnamed: 0,calories,duration
Day1,450,45
Day2,345,30
Day3,190,15


In [None]:
# Using Pandas To Deal With Different File Formats

# Read An Excel File
# read_excel()

data = pd.read_excel('https://trello-attachments.s3.amazonaws.com/600d1f10d700af20b1924b3c/600d1f700fcd073384905133/7c2b320aa5f10d0efa16ee38b9d53587/excelData.xlsx')
data

Unnamed: 0,Test Data,Unnamed: 1,Unnamed: 2
0,Rno,Name,Marks
1,1,John,34
2,2,Smith,56
3,3,Shahrukh,78
4,4,Nasir,54


In [None]:
excel_file = pd.ExcelFile('https://trello-attachments.s3.amazonaws.com/600d1f10d700af20b1924b3c/600d1f700fcd073384905133/7c2b320aa5f10d0efa16ee38b9d53587/excelData.xlsx')
data = pd.read_excel(excel_file,'Sheet2')
data

Unnamed: 0,Sno,Course,Semester
0,1,MCA,5
1,2,MBA,7
2,3,BCA,2
3,4,Btech,1


In [None]:
# Read A CSV File
# CSV --> Comma Separated Value
# read_csv()

data = pd.read_csv('https://trello-attachments.s3.amazonaws.com/6062c6af283e86843a32b025/60d0152706cf05391bac797c/0ced7d9ccd720cb82d87e6ff95ee701c/Startups_Data.csv')
data


Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94
5,131876.9,99814.71,362861.36,New York,156991.12
6,134615.46,147198.87,127716.82,California,156122.51
7,130298.13,145530.06,323876.68,Florida,155752.6
8,120542.52,148718.95,311613.29,New York,152211.77
9,123334.88,108679.17,304981.62,California,149759.96


In [None]:
# Read A Text File

data = pd.read_csv('https://trello-attachments.s3.amazonaws.com/600d1f10d700af20b1924b3c/600d1f700fcd073384905133/a338744c6a4359e2de64a834473f41a9/Iris.txt',sep='\t')
data

Unnamed: 0,SL,SW,PL,PW,CLASS
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [None]:
from google.colab import files
upload = files.upload()

Saving telecom_churn.csv to telecom_churn.csv


In [None]:
!ls # List The Files

sample_data  telecom_churn.csv


In [None]:
data = pd.read_csv('telecom_churn.csv')
data

Unnamed: 0,state,account length,area code,phone number,international plan,voice mail plan,number vmail messages,total day minutes,total day calls,total day charge,total eve minutes,total eve calls,total eve charge,total night minutes,total night calls,total night charge,total intl minutes,total intl calls,total intl charge,customer service calls,churn
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.70,1,False
1,OH,107,415,371-7191,no,yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.70,1,False
2,NJ,137,415,358-1921,no,no,0,243.4,114,41.38,121.2,110,10.30,162.6,104,7.32,12.2,5,3.29,0,False
3,OH,84,408,375-9999,yes,no,0,299.4,71,50.90,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,OK,75,415,330-6626,yes,no,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3328,AZ,192,415,414-4276,no,yes,36,156.2,77,26.55,215.5,126,18.32,279.1,83,12.56,9.9,6,2.67,2,False
3329,WV,68,415,370-3271,no,no,0,231.1,57,39.29,153.4,55,13.04,191.3,123,8.61,9.6,4,2.59,3,False
3330,RI,28,510,328-8230,no,no,0,180.8,109,30.74,288.8,58,24.55,191.9,91,8.64,14.1,6,3.81,2,False
3331,CT,184,510,364-6381,yes,no,0,213.8,105,36.35,159.6,84,13.57,139.2,137,6.26,5.0,10,1.35,2,False
