# Pandas

Pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.

pandas is a NumFOCUS sponsored project. This will help ensure the success of development of pandas as a world-class open-source project, and makes it possible to donate to the project.

Pandas is used for data manipulation, analysis and cleaning. Python pandas is well suited for different kinds of data, such as: 

<ul><li>Tabular data with heterogeneously-typed columns</li>
    <li>Ordered and unordered time series data</li>
    <li>Arbitrary matrix data with row & column labels</li>
    <li>Unlabelled data</li>
    <li>Any other form of observational or statistical data sets</li></ul>

# Python Operations

In [1]:
import pandas as pd
 
XYZ_web= {'Day':[1,2,3,4,5,6], "Visitors":[1000, 700,6000,1000,400,350], "Bounce_Rate":[20,20, 23,15,10,34]}
 
df= pd.DataFrame(XYZ_web)
 
print(df)

   Day  Visitors  Bounce_Rate
0    1      1000           20
1    2       700           20
2    3      6000           23
3    4      1000           15
4    5       400           10
5    6       350           34


# create Pandas Series

In [3]:
import pandas as pd
a = pd.Series([1,2,3,4,5])
print(a)
print("\n")
print(type(a))

0    1
1    2
2    3
3    4
4    5
dtype: int64


<class 'pandas.core.series.Series'>


In [4]:
a[2]

3

In [5]:
a = pd.Series(['a','b','c'])
print(a)

0    a
1    b
2    c
dtype: object


In [8]:
a = pd.date_range(start = '01-01-2018', end = '23-5-2018')
print(a)
print("\n")
print(type(a))

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10',
               ...
               '2018-05-14', '2018-05-15', '2018-05-16', '2018-05-17',
               '2018-05-18', '2018-05-19', '2018-05-20', '2018-05-21',
               '2018-05-22', '2018-05-23'],
              dtype='datetime64[ns]', length=143, freq='D')


<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


# Pandas dataframe

In [10]:
import numpy as np

temp = np.random.randint(low = 20, high =100, size = [20,])
name = np.random.choice(['Abhay','Teclov','Geekshub','Ankit'],20)
random = np.random.choice([10,11,13,12,14],20)

a = list(zip(temp, name, random))

df = pd.DataFrame(data = a, columns=['temp','name','random'])
print(df)
print("\n")
print(type(df))

    temp      name  random
0     72  Geekshub      13
1     67    Teclov      10
2     72     Ankit      14
3     69     Abhay      11
4     25  Geekshub      11
5     36    Teclov      10
6     54  Geekshub      13
7     50  Geekshub      11
8     80    Teclov      10
9     91    Teclov      11
10    53    Teclov      14
11    61     Abhay      10
12    79     Ankit      10
13    94     Abhay      12
14    22    Teclov      13
15    41    Teclov      10
16    93    Teclov      10
17    48    Teclov      12
18    41     Ankit      14
19    71    Teclov      11


<class 'pandas.core.frame.DataFrame'>


In [13]:
temp = np.random.randint(low = 20, high =100, size = [20,])
name = np.random.choice(['Abhay','Teclov','Geekshub','Ankit'],20)
random = np.random.choice([10,11,13,12,14],20)

df = pd.DataFrame({'temp':temp, 'name':name, 'random':random})
print(type(df))
print("\n")
print(df.head())
print("\n")
print(print(df.tail()))

<class 'pandas.core.frame.DataFrame'>


   temp      name  random
0    69     Abhay      10
1    96     Ankit      13
2    97  Geekshub      12
3    45    Teclov      12
4    44     Ankit      12


    temp      name  random
15    24    Teclov      13
16    50  Geekshub      11
17    24  Geekshub      10
18    82     Abhay      14
19    73    Teclov      12
None


In [15]:
print(df.shape)
print("\n")
print(df.columns)

(20, 3)


Index(['temp', 'name', 'random'], dtype='object')


In [16]:
df.name

0        Abhay
1        Ankit
2     Geekshub
3       Teclov
4        Ankit
5        Ankit
6        Ankit
7     Geekshub
8        Abhay
9        Ankit
10      Teclov
11       Abhay
12       Ankit
13      Teclov
14       Ankit
15      Teclov
16    Geekshub
17    Geekshub
18       Abhay
19      Teclov
Name: name, dtype: object

In [17]:
df['name']

0        Abhay
1        Ankit
2     Geekshub
3       Teclov
4        Ankit
5        Ankit
6        Ankit
7     Geekshub
8        Abhay
9        Ankit
10      Teclov
11       Abhay
12       Ankit
13      Teclov
14       Ankit
15      Teclov
16    Geekshub
17    Geekshub
18       Abhay
19      Teclov
Name: name, dtype: object