In [14]:
import pandas as pd

df = pd.read_csv('https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv')

In [15]:
#Display dataframe
df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


## Discussion 

####  1. What is a dataframe?
 A dataframe is a two-dimensional tabular data structure used in data manipulation and analysis. It is a fundamental data structure in libraries like Pandas in Python, where it resembles a table with rows and columns. Each column represents a specific variable, while each row represents an individual data point or observation.
Dataframes offer a wide range of functionalities, making it a powerful tool for data manipulation and analysis in Python. 
 
df = pd.DataFrame(data)


df = pandas.DataFrame(data, index, columns)

#### 2. What information can we obtain about a dataframe?

Information obtainable from a dataframe:

df.info() :- prints out some useful information about the dataframe,such as Column name, Null or Non-Null column,Count,Dtype.  
df.shape :- Dimensions-the number of rows and columns in the dataframe.
df.dtypes :- Data types of each column in the dataframe.
df.columns:- Column names of all columns present in the dataframe.
df.describe() :- Descriptive statistics like count, mean, standard deviation, minimum, 25th percentile, median, 75th percentile, and maximum for each numeric column.
df.head()Output only the first five values from df


In [None]:
#### 3. How have we interacted with a dataframe?

#Interacting with a dataframe involves tasks such as filtering rows, selecting specific columns, 
#performing calculations, and aggregating data.

# Create the dataframe
df = pd.DataFrame(data)

#Display the entire dataframe
print(df)
#########################
#Filtering Rows based on conditions using boolean indexing
# Filter rows where HP is greater than 40
filtered_rows = df[df['HP'] > 40]
print(filtered_rows)

###########################
#Selecting Specific Columns from the dataframe using their names.

# Select a single column 'Name'
df['Name']

# Select multiple columns
selected_columns = df[['Name', 'Attack']]
print(selected_columns)


#We can see multiple columns in the dataframe by subsetting the dataframe with a list of strings.
df[['column1', 'column2']]
#or 
columns = ['column1', 'column2']
df[columns]


# Select columns 'Name', 'Age', and 'Salary' using slicing,label-based indexing
selected_columns_slice = df.loc[:, 'Name':'Salary']


# Select columns using iloc at specific positions (0 and 2),integer-based indexing
df.iloc[:, [0, 2]]

#View the first 5 rows of the dataframe
print(df.head())

#View the last 5 rows of the dataframe
df.tail()

# View random rows from the dataframe
df.sample(3) # Change 3 to the number of random rows you want to see

###########################

#perform calculations on columns to create new columns
# Calculate total stats
df['TotalStats'] = df['HP'] + df['Attack'] + df['Defense']
print(df)
############################

#aggregation functions are used to summarize data,
#usually by computing statistics like mean, sum, max, min, etc., on specific columns or rows of a dataframe
# Aggregate data
average_hp = df['HP'].mean()
max_attack = df['Attack'].max()

#aggregate functions:-
df.mean()
df.sum()
df.max()
df.min()
df.median()
df.std()  #ex:round(df['A'].std(), 2)
df.count()# Count the non-null values in columns. ex: df['A'].count()


---

### Practice Exercises


#### Information about a dataframe

1. Obtain the following information:
    
    - dimensions
    - dtypes
    - column names
    - summary statistics

In [10]:
#dimensions
#shape: the number of rows and columns in the dataframe
df.shape

(800, 13)

In [11]:
#dtypes: the data type of each column
df.dtypes

#              int64
Name          object
Type 1        object
Type 2        object
Total          int64
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

In [12]:
#columns: the list of column names
df.columns    

Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

In [13]:
#summary statistics

#The .describe method gives a quick summary of the numerical values in a dataframe.
df.describe()


Unnamed: 0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


In [51]:
df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


#### Working with dataframes

1. What is the highest HP value present?

    - Create a function named highest_attack.
    - Use the loaded dataframe as an argument. 

In [42]:
df.HP.max()

255

In [96]:
def highest_attack(df,field='HP'):
    return df[field].max()

highest_hp_value = highest_attack(df)
print(highest_hp_value)

255


In [97]:
def highest_attack(df,field='HP'):
    return df[field].max()

highest_hp_value = highest_attack(df,'Attack')
print(highest_hp_value)

190


In [56]:
# or
def highest_attack1(df):
    return df['HP'].max()


highest_attack_value = highest_attack1(df)
print(highest_attack_value)

255


In [40]:
df.HP.max()

255

2. Which Pokemon possess(es) the highest HP value?

In [83]:
df.sort_values(by='HP', ascending=False)['Name'].head(1)


261    Blissey
Name: Name, dtype: object

In [84]:
#or
def pokemon_highest_hp(df):
    highest_hp= df.sort_values(by='HP', ascending=False)['Name'].head(1)
    return highest_hp
highest_hp_value = pokemon_highest_hp(df)
highest_hp_value

261    Blissey
Name: Name, dtype: object

 
3. How many different types are represented in Type 1?

    - Create a function named num_types
    - Use the loaded dataframe as an argument
    

In [86]:
df['Type 1'].nunique()

18

In [94]:
def num_types(df,field='Type 1'):
    diff_num_types = df[field].nunique()
    return diff_num_types

diff_num_types_values = num_types(df)
print(diff_num_types_values) 

18


4. Number of Pokemon whose Type 2 is Ghost

In [110]:
(df['Type 2'] == 'Ghost').sum()

14

In [112]:
len(df[df['Type 2'] == 'Ghost'])

14

In [36]:
def num_ghost_type2(dataframe):
    num_ghost_type2_pokemon = dataframe[dataframe['Type 2'] == 'Ghost'].shape[0]
    return num_ghost_type2_pokemon

diff_type_intype2 = num_ghost_type2(df)
print(diff_type_intype2)


14


5. Percentage of Pokemon whose Type 2 is Ghost

    - Create a function named percent_ghost
     - Use the loaded dataframe as an argument

In [113]:
len(df)

800

In [114]:
df.shape[0]# no:of rows in df

800

In [None]:
(df['Type 2'] == 'Ghost').sum() #to get sum of all type2 that are ghost

In [118]:
round(((df['Type 2'] == 'Ghost').sum()/df.shape[0])* 100 ,2)#Percentage of Pokemon whose Type 2 is Ghost

1.75

In [122]:
#Creating function
def percent_ghost(df):
    return round(
    (
        (df['Type 2'] == 'Ghost').sum() / df.shape[0]
    )  * 100, 2
)

In [121]:
#or
def percent_ghost(df):
    percent_ghost_round = round(((df['Type 2'] == 'Ghost').sum()/df.shape[0])* 100 ,2)
    return percent_ghost_round
percent_ghost_round_value = percent_ghost(df)
percent_ghost_round_value 

1.75

 6. Number of Pokemon whose Attack is greater than Defense


In [124]:
(df['Attack']> df['Defense']).sum()

433

In [38]:
    def num_attack_greater_than_defense(dataframe):
        num_pokemon_attack_gt_defense = dataframe[dataframe['Attack'] > dataframe['Defense']].shape[0]
        return num_pokemon_attack_gt_defense
    
diff_type_intype4 = num_attack_greater_than_defense(df)
print(diff_type_intype4)

433


7. Lowest speed for Grass or Rock

In [128]:
df[(df['Type 1'] == 'Grass') | (df['Type 1'] == 'Rock')]['Speed'].min()

10

In [39]:
def lowest_speed_grass_or_rock(dataframe):
    grass_rock_pokemon = dataframe[(dataframe['Type 1'] == 'Grass') | (dataframe['Type 1'] == 'Rock')]
    lowest_speed = grass_rock_pokemon['Speed'].min()
    return lowest_speed

diff_type_intype5 = lowest_speed_grass_or_rock(df)
print(diff_type_intype5)

10
