In [6]:
import pandas as pd
import numpy as np

# two type of datastructure are provide by pandas 
#   1) pandas series 
#   2) pandas dataframe 

In [4]:
# Technically, Pandas Series is a one-dimensional labeled array 
series_list = pd.Series([1,2,3,4,5,6])
series_list

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [8]:
# it’s possible to have our own row index values while creating a Series.
series_index = pd.Series(
      np.array([10,20,30,40,50,60]), 
      index=np.arange(0,12,2) 
)
series_index

0     10
2     20
4     30
6     40
8     50
10    60
dtype: int64

In [10]:
series_index = pd.Series(
      np.array([10,20,30,40,50,60]), 
      index=np.array(['a','b','c','d','e','f']) 
)
series_index

a    10
b    20
c    30
d    40
e    50
f    60
dtype: int64

In [17]:
series_index = pd.Series(
      [10,20,30,40,50,60], 
      index=['a','b','c','d','e','f'] 
)
series_index

a    10
b    20
c    30
d    40
e    50
f    60
dtype: int64

In [18]:
series_index.index

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

In [20]:
# we can update the index of the series .
series_index.index = np.array(['Abhay','Anjani','Akash','Rumman','Adarsh','HnCC'])
print(series_index)

Abhay     10
Anjani    20
Akash     30
Rumman    40
Adarsh    50
HnCC      60
dtype: int64


In [25]:
# we can create pandas series using python dictonary
py_d = {'a':100 , 'b':1245 , 'c' : 1425 , 'd' : 125462}
series_d = pd.Series(py_d)
print(series_d)

a       100
b      1245
c      1425
d    125462
dtype: int64


In [30]:
# we can assing list as value in any dictonary 
list_d = {'a':[12,3,4,5] , 'b':[41,25,36] , 'c': [1]}
series_list = pd.Series(list_d)
print(series_list)

a    [12, 3, 4, 5]
b     [41, 25, 36]
c              [1]
dtype: object


# DataFrame

In [41]:
# Pandas DataFrame is nothing but an in-memory representation of an excel sheet via Python programming language
my_dict = { 
     'name' : ["a", "b", "c", "d", "e","f", "g"],
     'age' : [20,27, 35, 55, 18, 21, 35],
     'designation': ["VP", "CEO", "CFO", "VP", "VP", "CEO", "MD"]
}
my_dataframe = pd.DataFrame(my_dict, index = ['Adarsh','Anjani','HnCC','Hanzla','Abhay','Rumman','Akash'])  #  Key as Column name  and values as the  Column values.
print(my_dataframe )


       name  age designation
Adarsh    a   20          VP
Anjani    b   27         CEO
HnCC      c   35         CFO
Hanzla    d   55          VP
Abhay     e   18          VP
Rumman    f   21         CEO
Akash     g   35          MD


In [42]:
my_dataframe.index 

Index(['Adarsh', 'Anjani', 'HnCC', 'Hanzla', 'Abhay', 'Rumman', 'Akash'], dtype='object')

In [43]:
# we can update index of DataFrame .
my_dataframe.index = np.array(['a','b','c','d','e','f','g'])
my_dataframe

Unnamed: 0,name,age,designation
a,a,20,VP
b,b,27,CEO
c,c,35,CFO
d,d,55,VP
e,e,18,VP
f,f,21,CEO
g,g,35,MD


In [45]:
# we  can Access any Column of DataFrame 
my_dataframe['age'] # Dictionary Like Syntax 

a    20
b    27
c    35
d    55
e    18
f    21
g    35
Name: age, dtype: int64

In [46]:
my_dataframe.age

a    20
b    27
c    35
d    55
e    18
f    21
g    35
Name: age, dtype: int64

# # a column of the DataFrame will always be of same type.

In [50]:
# We can get dataType of each Column 
my_dataframe.dtypes

name           object
age             int64
designation    object
dtype: object

# Working on Huge DataSet

In [52]:
data = pd.read_csv('data.csv')

In [53]:
data

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,3.130000e+05,3.0,1.50,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2.384000e+06,5.0,2.50,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,3.420000e+05,3.0,2.00,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,4.200000e+05,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,5.500000e+05,4.0,2.50,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4595,2014-07-09 00:00:00,3.081667e+05,3.0,1.75,1510,6360,1.0,0,0,4,1510,0,1954,1979,501 N 143rd St,Seattle,WA 98133,USA
4596,2014-07-09 00:00:00,5.343333e+05,3.0,2.50,1460,7573,2.0,0,0,3,1460,0,1983,2009,14855 SE 10th Pl,Bellevue,WA 98007,USA
4597,2014-07-09 00:00:00,4.169042e+05,3.0,2.50,3010,7014,2.0,0,0,3,3010,0,2009,0,759 Ilwaco Pl NE,Renton,WA 98059,USA
4598,2014-07-10 00:00:00,2.034000e+05,4.0,2.00,2090,6630,1.0,0,0,3,1070,1020,1974,0,5148 S Creston St,Seattle,WA 98178,USA


In [55]:
data.head() # we get 5 top data as Default

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA


In [56]:
data.head(10) # we can Specify the number of data from top we want 

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA
5,2014-05-02 00:00:00,490000.0,2.0,1.0,880,6380,1.0,0,0,3,880,0,1938,1994,522 NE 88th St,Seattle,WA 98115,USA
6,2014-05-02 00:00:00,335000.0,2.0,2.0,1350,2560,1.0,0,0,3,1350,0,1976,0,2616 174th Ave NE,Redmond,WA 98052,USA
7,2014-05-02 00:00:00,482000.0,4.0,2.5,2710,35868,2.0,0,0,3,2710,0,1989,0,23762 SE 253rd Pl,Maple Valley,WA 98038,USA
8,2014-05-02 00:00:00,452500.0,3.0,2.5,2430,88426,1.0,0,0,4,1570,860,1985,0,46611-46625 SE 129th St,North Bend,WA 98045,USA
9,2014-05-02 00:00:00,640000.0,4.0,2.0,1520,6200,1.5,0,0,3,1520,0,1945,2010,6811 55th Ave NE,Seattle,WA 98115,USA


In [57]:
data.tail()

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
4595,2014-07-09 00:00:00,308166.666667,3.0,1.75,1510,6360,1.0,0,0,4,1510,0,1954,1979,501 N 143rd St,Seattle,WA 98133,USA
4596,2014-07-09 00:00:00,534333.333333,3.0,2.5,1460,7573,2.0,0,0,3,1460,0,1983,2009,14855 SE 10th Pl,Bellevue,WA 98007,USA
4597,2014-07-09 00:00:00,416904.166667,3.0,2.5,3010,7014,2.0,0,0,3,3010,0,2009,0,759 Ilwaco Pl NE,Renton,WA 98059,USA
4598,2014-07-10 00:00:00,203400.0,4.0,2.0,2090,6630,1.0,0,0,3,1070,1020,1974,0,5148 S Creston St,Seattle,WA 98178,USA
4599,2014-07-10 00:00:00,220600.0,3.0,2.5,1490,8102,2.0,0,0,4,1490,0,1990,0,18717 SE 258th St,Covington,WA 98042,USA


In [58]:
data.tail(8)

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
4592,2014-07-08 00:00:00,252980.0,4.0,2.5,2530,8169,2.0,0,0,3,2530,0,1993,0,37654 18th Pl S,Federal Way,WA 98003,USA
4593,2014-07-08 00:00:00,289373.307692,3.0,2.5,2538,4600,2.0,0,0,3,2538,0,2013,1923,5703 Charlotte Ave SE,Auburn,WA 98092,USA
4594,2014-07-09 00:00:00,210614.285714,3.0,2.5,1610,7223,2.0,0,0,3,1610,0,1994,0,26306 127th Ave SE,Kent,WA 98030,USA
4595,2014-07-09 00:00:00,308166.666667,3.0,1.75,1510,6360,1.0,0,0,4,1510,0,1954,1979,501 N 143rd St,Seattle,WA 98133,USA
4596,2014-07-09 00:00:00,534333.333333,3.0,2.5,1460,7573,2.0,0,0,3,1460,0,1983,2009,14855 SE 10th Pl,Bellevue,WA 98007,USA
4597,2014-07-09 00:00:00,416904.166667,3.0,2.5,3010,7014,2.0,0,0,3,3010,0,2009,0,759 Ilwaco Pl NE,Renton,WA 98059,USA
4598,2014-07-10 00:00:00,203400.0,4.0,2.0,2090,6630,1.0,0,0,3,1070,1020,1974,0,5148 S Creston St,Seattle,WA 98178,USA
4599,2014-07-10 00:00:00,220600.0,3.0,2.5,1490,8102,2.0,0,0,4,1490,0,1990,0,18717 SE 258th St,Covington,WA 98042,USA


In [60]:
data.describe() # It retuen the Statical information from the dataSet 

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated
count,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0,4600.0
mean,551963.0,3.40087,2.160815,2139.346957,14852.52,1.512065,0.007174,0.240652,3.451739,1827.265435,312.081522,1970.786304,808.608261
std,563834.7,0.908848,0.783781,963.206916,35884.44,0.538288,0.084404,0.778405,0.67723,862.168977,464.137228,29.731848,979.414536
min,0.0,0.0,0.0,370.0,638.0,1.0,0.0,0.0,1.0,370.0,0.0,1900.0,0.0
25%,322875.0,3.0,1.75,1460.0,5000.75,1.0,0.0,0.0,3.0,1190.0,0.0,1951.0,0.0
50%,460943.5,3.0,2.25,1980.0,7683.0,1.5,0.0,0.0,3.0,1590.0,0.0,1976.0,0.0
75%,654962.5,4.0,2.5,2620.0,11001.25,2.0,0.0,0.0,4.0,2300.0,610.0,1997.0,1999.0
max,26590000.0,9.0,8.0,13540.0,1074218.0,3.5,1.0,4.0,5.0,9410.0,4820.0,2014.0,2014.0


In [62]:
# we can perform opeartions on any specific Column 
data.price.mean()

551962.9884732141

In [64]:
data_of_use = ['price' ,'bedrooms','floors']
new_data = data[data_of_use]
print(new_data)

             price  bedrooms  floors
0     3.130000e+05       3.0     1.5
1     2.384000e+06       5.0     2.0
2     3.420000e+05       3.0     1.0
3     4.200000e+05       3.0     1.0
4     5.500000e+05       4.0     1.0
...            ...       ...     ...
4595  3.081667e+05       3.0     1.0
4596  5.343333e+05       3.0     2.0
4597  4.169042e+05       3.0     2.0
4598  2.034000e+05       4.0     1.0
4599  2.206000e+05       3.0     2.0

[4600 rows x 3 columns]


In [65]:
data.size

82800

In [66]:
data.shape

(4600, 18)

In [68]:
data[0:100:5] # we can perform slicing on DataForm . 

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
5,2014-05-02 00:00:00,490000.0,2.0,1.0,880,6380,1.0,0,0,3,880,0,1938,1994,522 NE 88th St,Seattle,WA 98115,USA
10,2014-05-02 00:00:00,463000.0,3.0,1.75,1710,7320,1.0,0,0,3,1710,0,1948,1994,Burke-Gilman Trail,Lake Forest Park,WA 98155,USA
15,2014-05-02 00:00:00,242500.0,3.0,1.5,1200,9720,1.0,0,0,4,1200,0,1965,0,14034 SE 201st St,Kent,WA 98042,USA
20,2014-05-02 00:00:00,750000.0,3.0,1.75,2240,10578,2.0,0,0,5,1550,690,1923,0,3225 NE 92nd St,Seattle,WA 98115,USA
25,2014-05-02 00:00:00,285000.0,3.0,2.5,2090,10834,1.0,0,0,4,1360,730,1987,0,27736 23rd Avenue South,Federal Way,WA 98003,USA
30,2014-05-02 00:00:00,382500.0,4.0,1.75,1560,8700,1.0,0,0,4,1560,0,1967,0,14104 119th Ave NE,Kirkland,WA 98034,USA
35,2014-05-02 00:00:00,604000.0,3.0,2.5,3240,33151,2.0,0,2,3,3240,0,1995,0,30822 36th Ct SW,Federal Way,WA 98023,USA
40,2014-05-02 00:00:00,335000.0,3.0,2.25,1580,16215,1.0,0,0,4,1580,0,1978,2000,4460 332nd Ave SE,Fall City,WA 98024,USA
45,2014-05-02 00:00:00,315000.0,3.0,1.0,1160,9180,1.0,0,0,3,1160,0,1968,1997,15804 198th Pl NE,Woodinville,WA 98077,USA


In [72]:
data.drop('bedrooms',1) # second arg 1 specify that we want to drop the specified Column 
data.head()

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA


In [76]:
data.drop(1,0)
data.head() # It deleate the Row 1 from the DataFrane 

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA
