# Pandas Tutorial
Pandas is a Python library.

Pandas is used to analyze data.

In [None]:
import numpy as np
import pandas as pd

In [None]:
print(pd.__version__)

In [None]:
mydataset = {
  'cars': ["BMW", "Volvo", "Ford"],
  'passings': [3, 7, 2]
  

}
myvar = pd.DataFrame(mydataset)

print(myvar)

# 1. Pandas Series
A Pandas Series is like a column in a table.
It is a one-dimensional array holding data of any type.

In [None]:
# Create a simple Pandas Series from a list:
a = [1, 7, 2]
myvar = pd.Series(a)
print(myvar)

In [None]:
#Return the first value of the Series:

print(myvar[0])

In [None]:
# Create Labels
# With the index argument, you can name your own labels.
a = [1, 7, 2]
myvar = pd.Series(a, index = ["x", "y", "z"])
print(myvar)

In [None]:
#Return the value of "y":
print(myvar["y"])

Key/Value Objects as Series

You can also use a key/value object, like a dictionary, when creating a Series.

Note: The keys of the dictionary become the labels.

In [None]:
# Create a simple Pandas Series from a dictionary:
calories = {"satarday": 420, "sunday": 380, "Monday": 390}
myvar = pd.Series(calories)
print(myvar)

In [None]:
#Create a Series using only data from "satarday" and "Monday":
calories = {"satarday": 420, "sunday": 380, "Monday": 390}
myvar = pd.Series(calories, index = ["satarday", "Monday"])

print(myvar)

# 2. DataFrames

A Pandas DataFrame is a 2 dimensional data structure, like a 2 dimensional array, or a table with rows and columns.

In [None]:
#Create a DataFrame from two Series:
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}
df = pd.DataFrame(data)
print(df)

In [None]:
# Locate Row
# Pandas use the loc attribute to return one or more specified row(s)
print(df.loc[0]) #refer to the row index:

In [None]:
# Return row 0 and 1:
print(df.loc[[0, 1]]) # use a list of indexes:

# 3. Pandas Read CSV

In [None]:
# Load the CSV into a DataFrame:
df = pd.read_csv('data.csv')
print(df.to_string()) # Tip: use to_string() to print the entire DataFrame.

In [None]:
df # Print the DataFrame without the to_string() method:

In [None]:
print(df)

In [None]:
df.columns

 Inplace
 
 Inplace is an argument used in different functions. Some functions in which inplace is used as an attributes like, set_index(), dropna(), fillna(), reset_index(), drop(), replace() and many more. The default value of this attribute is False and it returns the copy of the object.

 inplace: Makes changes in original Data Frame if True.

In [None]:
df.rename(columns={'Calories':'Calorie'},inplace=False) # here are no changes in the original dataframe. Through this, 
df                                                      #we conclude that the default value of inplace is False.


In [None]:
df.rename(columns={'Calories':'Calorie'},inplace=True) # we can see that the original dataframe columns have been modified

In [None]:
# Check the number of maximum returned rows:

print(pd.options.display.max_rows) 

iloc() & loc()

Python iloc() function enables us to select a particular cell of the dataset, that is, it helps us select a value that belongs to a particular row or column from a set of values of a data frame or dataset.

With iloc() function, we can retrieve a particular value belonging to a row and column using the index values assigned to it.

Remember, iloc() function accepts only integer type values as the index values for the values to be accessed and displayed.

Syntax

dataframe.iloc[row, column]

In [None]:
r1 = df.iloc[:,1:4]
r1

In [None]:
# retrieving rows by iloc method
r2 = df.iloc[:,0:3]
r2

In [None]:
r1 == r2

DataFrame.ix[ ] 
is both Label and Integer based slicing technique.

In [None]:
# Slicing rows and columns(rows=4, col 1-4, excluding 4):")
x2 = df.ix[:4, 'Duration': 'Pulse']
x2

In [None]:
# df.drop(df.ix[:, 'Duration': 'Pulse'].columns, axis = 1)

In [None]:
# Increase the maximum number of rows to display the entire DataFrame:
pd.options.display.max_rows = 9999
df = pd.read_csv('data.csv')

print(df) 

# 4. Pandas Read JSON
JSON = Python Dictionary

JSON objects have the same format as Python dictionaries.

If your JSON code is not in a file, but in a Python Dictionary, you can load it into a DataFrame directly

In [None]:

data = {
  "Duration":{
    "0":60,
    "1":60,
    "2":60,
    "3":45,
    "4":45,
    "5":60
  },
  "Pulse":{
    "0":110,
    "1":117,
    "2":103,
    "3":109,
    "4":117,
    "5":102
  },
  "Maxpulse":{
    "0":130,
    "1":145,
    "2":135,
    "3":175,
    "4":148,
    "5":127
  },
  "Calories":{
    "0":409,
    "1":479,
    "2":340,
    "3":282,
    "4":406,
    "5":300
  }
}

dj = pd.DataFrame(data)

print(dj) 


In [None]:
dj.to_json('data.json') # saving data json formate

In [None]:
print(dj)

In [None]:
dj = pd.read_json('data.json')
print(dj.to_string())

In [None]:
dj.head()

In [None]:
df.tail()

In [None]:
print(df.info()) 

# 5. drop one or multiple columns in Pandas Dataframe

Syntax:
DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors=’raise’)

labels: String or list of strings referring row or column name.

axis: int or string value, 0 ‘index’ for Rows and 1 ‘columns’ for Columns.

index or columns: Single label or list. index or columns are an alternative to axis and cannot be used together.

In [None]:
df=pd.read_csv('Data.csv')
df

In [None]:
df.columns

In [None]:
x=df.drop(['Calories'], axis=1) # Remove specific single column.
x

In [None]:
y=df.drop(['Calories', 'Pulse'], axis=1) # Remove specific multiple columns.
y

In [None]:
# Remove columns as based on column index.
z=df.drop(df.columns[[0,1]], axis=1, inplace=True)
z

In [None]:
df.drop(df.iloc[:, 2:3], inplace = True, axis = 1)
  
df

In [None]:
# Remove all columns between column name 'Pulse': 'Maxpulse'
df.drop(df.loc[:, 'Pulse': 'Maxpulse'].columns, axis = 1)

In [None]:
# Convert the dictionary into DataFrame 
for col in df.columns:
    if 'Maxpulse' in col:
        del df[col]

df

In [None]:
df.dtypes # return data types of all variables

value_counts()

In [None]:

df['Duration'].value_counts()# value_counts() function to count the number of unique values in the given Index.

unique()

In [48]:

df['Duration'].unique() # unique() method is used to know all type of unique values in Team column.

array([ 60,  45,  30,  80,  20, 210, 160, 180, 150, 300,  90, 120, 270,
        15,  25,  75], dtype=int64)