In [None]:
#@title Copyright 2020 Google LLC. Double-click here for license information.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv("Shark_Tank_India_S1.csv")

In [None]:
data

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.shape

In [None]:
data.columns

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data['deal_offered'].value_counts()

In [None]:
data['aman_present'].value_counts()

In [None]:
data['anupam_present'].value_counts()

In [None]:
data['ashneer_present'].value_counts()

In [None]:
data['ghazal_present'].value_counts()

In [None]:
data['peyush_present'].value_counts()

In [None]:
data['namita_present'].value_counts()

In [None]:
aman_present = len(data[data.aman_present == 1])
anupam_present = len(data[data.anupam_present == 1])
ashneer_present = len(data[data.ashneer_present == 1])
ghazal_present = len(data[data.ghazal_present == 1])
namita_present = len(data[data.namita_present == 1])
peyush_present = len(data[data.peyush_present == 1])
vineeta_present = len(data[data.vineeta_present == 1])

In [None]:
fig = plt.figure(figsize = (10, 5))
present = [aman_present, anupam_present, ashneer_present, ghazal_present,
namita_present, peyush_present, vineeta_present]
names = ['Aman','Anupam', 'Ashneer', 'Ghazal', 'Namita', 'Peyush', 'Vineeta']
plt.bar(names, present, color ='maroon',width = 0.4)
plt.xlabel("Candidate Name")
plt.ylabel("Candidate Present")
plt.title("Candidate Present vs Name")
plt.show()

In [None]:
plt.pie(present, labels = names, radius = 2.0, autopct = '%0.1f%%')
plt.suptitle('Percentage of candidates present', x = 0.55,
y = 1.15, color = 'red')
plt.show()

In [None]:
data['aman_invested'].value_counts()

In [None]:
data['anupam_invested'].value_counts()

In [None]:
data['ashneer_invested'].value_counts()

In [None]:
data['ghazal_invested'].value_counts()

In [None]:
data['namita_invested'].value_counts()

In [None]:
data['peyush_invested'].value_counts()

In [None]:
data['vineeta_invested'].value_counts()

In [None]:
aman_invested = len(data[data.aman_invested == 1])
anupam_invested = len(data[data.anupam_invested == 1])
ashneer_invested = len(data[data.ashneer_invested == 1])
ghazal_invested = len(data[data.ghazal_invested == 1])
namita_invested = len(data[data.namita_invested == 1])
peyush_invested = len(data[data.peyush_invested == 1])
vineeta_invested = len(data[data.vineeta_invested == 1])

In [None]:
fig = plt.figure(figsize = (10, 5))
invested = [aman_invested, anupam_invested, ashneer_invested, ghazal_invested,
namita_invested, peyush_invested, vineeta_invested]
names = ['Aman','Anupam', 'Ashneer', 'Ghazal', 'Namita', 'Peyush', 'Vineeta']
plt.bar(names, invested, color ='orange',width = 0.4)
plt.xlabel("Candidate Name")
plt.ylabel("Candidate Invested")
plt.title("Candidate Invested vs Name")
plt.show()

In [None]:
plt.pie(invested, labels = names, radius = 2.0, autopct = '%0.1f%%',
explode = [0.25,0.25,0.25,0.25,0.25,0.25,0.25])
plt.suptitle('Percentage of candidates invested', x = 0.55,
y = 1.15, color = 'red')
plt.show()

In [None]:
#Percent of investments
aman_percent = (aman_invested/aman_present)*100
anupam_percent = (anupam_invested/anupam_present)*100
ashneer_percent = (ashneer_invested/ashneer_present)*100
ghazal_percent = (ghazal_invested/ghazal_present)*100
namita_percent = (namita_invested/namita_present)*100
peyush_percent = (peyush_invested/peyush_present)*100
vineeta_percent = (vineeta_invested/vineeta_present)*100

In [None]:
fig = plt.figure(figsize = (10, 5))
percentage = [aman_percent, anupam_percent, ashneer_percent, ghazal_percent,
namita_percent, peyush_percent, vineeta_percent]
names = ['Aman','Anupam', 'Ashneer', 'Ghazal', 'Namita', 'Peyush', 'Vineeta']
plt.bar(names, percentage, color ='blue',width = 0.4)
plt.xlabel("Candidate Name")
plt.ylabel("Candidate Invested - Percentage")
plt.title("Candidate Invested - Percentage vs Name")
plt.show()

In [None]:
plt.pie(percentage, labels = names, radius = 2.0, autopct = '%0.1f%%',
explode = [0.25,0.25,0.25,0.25,0.25,0.25,0.25])
plt.show()

# Pandas DataFrame UltraQuick Tutorial

This Colab introduces [**DataFrames**](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html), which are the central data structure in the pandas API. This Colab is not a comprehensive DataFrames tutorial.  Rather, this Colab provides a very quick introduction to the parts of DataFrames required to do the other Colab exercises in Machine Learning Crash Course.

A DataFrame is similar to an in-memory spreadsheet. Like a spreadsheet:

  * A DataFrame stores data in cells. 
  * A DataFrame has named columns (usually) and numbered rows.

## Import NumPy and pandas modules

Run the following code cell to import the NumPy and pandas modules. 

In [None]:
import numpy as np
import pandas as pd

## Creating a DataFrame

The following code cell creates a simple DataFrame containing 10 cells organized as follows:

  * 5 rows
  * 2 columns, one named `temperature` and the other named `activity`

The following code cell instantiates a `pd.DataFrame` class to generate a DataFrame. The class takes two arguments:

  * The first argument provides the data to populate the 10 cells. The code cell calls `np.array` to generate the 5x2 NumPy array.
  * The second argument identifies the names of the two columns.

In [None]:
# Create and populate a 5x2 NumPy array.
my_data = np.array([[0, 3], [10, 7], [20, 9], [30, 14], [40, 15]])

# Create a Python list that holds the names of the two columns.
my_column_names = ['temperature', 'activity']

# Create a DataFrame.
my_dataframe = pd.DataFrame(data=my_data, columns=my_column_names)

# Print the entire DataFrame
print(my_dataframe)

In [None]:
# Create and populate a 5x3 NumPy array.
#Reg_No = Col1, Name= Col2, Marks=Col3
my_class_data = np.array ([[224501,'A', 90], [224502,'B', 90], [224503, 'C', 89], [224504, 'D', 82], [224505, 'E', 84]])
my_classCol_names = ['Reg_No', 'Name', 'Eco_Marks']

my_class_dataframe = pd.DataFrame(data=my_class_data, columns=my_classCol_names)

print(my_class_dataframe)

#my_class_dataframe['Eco_Marks'].mean()
#my_class_dataframe['Eco_Marks'].median()
my_class_dataframe['Eco_Marks'].mode()


#my_class_dataframe["Lab_Marks"] = my_class_dataframe["Eco_Marks"] + 2
#print(my_class_dataframe) 




## Adding a new column to a DataFrame

You may add a new column to an existing pandas DataFrame just by assigning values to a new column name. For example, the following code creates a third column named `adjusted` in `my_dataframe`: 

In [None]:
# Create a new column named adjusted.
my_dataframe["adjusted"] = my_dataframe["activity"] + 2

# Print the entire DataFrame
print(my_dataframe)

## Specifying a subset of a DataFrame

Pandas provide multiples ways to isolate specific rows, columns, slices or cells in a DataFrame. 

In [None]:
print("Rows #0, #1, and #2:")
print(my_dataframe.head(3), '\n')

print("Row #2:")
print(my_dataframe.iloc[[2]], '\n')

print("Rows #1, #2, and #3:")
print(my_dataframe[1:4], '\n')

print("Column 'temperature':")
print(my_dataframe['temperature'])

In [None]:
import pandas as pd
from google.colab import files
uploaded = files.upload()
import io
df2 = pd.read_csv(io.BytesIO(uploaded['IBBA_2145xx.csv']))
print (df2)

## Task 1: Create a DataFrame

Do the following:

  1. Create an 3x4 (3 rows x 4 columns) pandas DataFrame in which the columns are named `Eleanor`,  `Chidi`, `Tahani`, and `Jason`.  Populate each of the 12 cells in the DataFrame with a random integer between 0 and 100, inclusive.

  2. Output the following:

     * the entire DataFrame
     * the value in the cell of row #1 of the `Eleanor` column

  3. Create a fifth column named `Janet`, which is populated with the row-by-row sums of `Tahani` and `Jason`.

To complete this task, it helps to know the NumPy basics covered in the NumPy UltraQuick Tutorial. 


In [None]:
# Write your code here.

In [None]:
#@title Double-click for a solution to Task 1.

# Create a Python list that holds the names of the four columns.
my_column_names = ['Eleanor', 'Chidi', 'Tahani', 'Jason']

# Create a 3x4 numpy array, each cell populated with a random integer.
my_data = np.random.randint(low=0, high=101, size=(3, 4))

# Create a DataFrame.
df = pd.DataFrame(data=my_data, columns=my_column_names)

# Print the entire DataFrame
print(df)

# Print the value in row #1 of the Eleanor column.
print("\nSecond row of the Eleanor column: %d\n" % df['Eleanor'][1])

# Create a column named Janet whose contents are the sum
# of two other columns.
df['Janet'] = df['Tahani'] + df['Jason']

# Print the enhanced DataFrame
print(df)

## Copying a DataFrame (optional)

Pandas provides two different ways to duplicate a DataFrame:

* **Referencing.** If you assign a DataFrame to a new variable, any change to the DataFrame or to the new variable will be reflected in the other. 
* **Copying.** If you call the `pd.DataFrame.copy` method, you create a true independent copy.  Changes to the original DataFrame or to the copy will not be reflected in the other. 

The difference is subtle, but important.

In [None]:
# Create a reference by assigning my_dataframe to a new variable.
print("Experiment with a reference:")
reference_to_df = df

# Print the starting value of a particular cell.
print("  Starting value of df: %d" % df['Jason'][1])
print("  Starting value of reference_to_df: %d\n" % reference_to_df['Jason'][1])

# Modify a cell in df.
df.at[1, 'Jason'] = df['Jason'][1] + 5
print("  Updated df: %d" % df['Jason'][1])
print("  Updated reference_to_df: %d\n\n" % reference_to_df['Jason'][1])

# Create a true copy of my_dataframe
print("Experiment with a true copy:")
copy_of_my_dataframe = my_dataframe.copy()

# Print the starting value of a particular cell.
print("  Starting value of my_dataframe: %d" % my_dataframe['activity'][1])
print("  Starting value of copy_of_my_dataframe: %d\n" % copy_of_my_dataframe['activity'][1])

# Modify a cell in df.
my_dataframe.at[1, 'activity'] = my_dataframe['activity'][1] + 3
print("  Updated my_dataframe: %d" % my_dataframe['activity'][1])
print("  copy_of_my_dataframe does not get updated: %d" % copy_of_my_dataframe['activity'][1])