## Importing the library

In [1]:
import pandas as pd 

### Creating a Dataframe from dictionary

In [2]:
# Names
Names = ["Ahmed" , "Mostafa" , "Mariam"]
Ages =  [18 , 22 , 20]
Gender = ["Male" , "Male" , "Female"]

In [3]:
# Create a dictionary
data = {
    "names":Names,
    "ages": Ages,
    "gender": Gender
}

In [4]:
# Create a dataframe
df = pd.DataFrame(data)
print(df)

     names  ages  gender
0    Ahmed    18    Male
1  Mostafa    22    Male
2   Mariam    20  Female


In [5]:
df["names"]

0      Ahmed
1    Mostafa
2     Mariam
Name: names, dtype: object

### Creating a DataFrame from nDimension list

In [6]:
data = [
    # ["Names" , "Ages" , "City"],
    ["Ahmed" , 18 , "Alexandria"],
    ["Mostafa" , 22 , "Cairo"],
    ["Mariam" , 20 , "Alexandria"] 
]

In [8]:
df = pd.DataFrame(data)
print(df)
df.columns = ["Names" , "Age" , "City"]
print()
print(df)

         0   1           2
0    Ahmed  18  Alexandria
1  Mostafa  22       Cairo
2   Mariam  20  Alexandria

     Names  Age        City
0    Ahmed   18  Alexandria
1  Mostafa   22       Cairo
2   Mariam   20  Alexandria


In [None]:
# Add coulmns in the initialization
df = pd.DataFrame(data, columns = ["Names" , "Age" , "City"])
print(df)

     Names  Age        City
0    Ahmed   18  Alexandria
1  Mostafa   22       Cairo
2   Mariam   20  Alexandria


In [None]:
# Extract Columns
print(df["Names"])
print()
print(df["Age"])
print()
print(df["City"])

0      Ahmed
1    Mostafa
2     Mariam
Name: Names, dtype: object

0    18
1    22
2    20
Name: Age, dtype: int64

0    Alexandria
1         Cairo
2    Alexandria
Name: City, dtype: object


In [None]:
# Extract Full Row
print(df.loc[0])

Names         Ahmed
Age              18
City     Alexandria
Name: 0, dtype: object


In [None]:
# Extract Specified Data
print(df.loc[2]["Age"])

20


### DataFrame Operations

In [None]:
# Mean , Sum , Max , Min 
max = df["Age"].max()
print(max)

min = df["Age"].min()
print(min)

mean = df["Age"].mean()
print(mean)

sum = df["Age"].sum()
print(sum)

22
18
20.0
60


In [None]:
df["City"]

0    Alexandria
1         Cairo
2    Alexandria
Name: City, dtype: object

In [None]:
# Unique Values 
unique = df["City"].unique()
print(unique)

['Alexandria' 'Cairo']


In [None]:
# Counts values
unique = df["City"].value_counts()
print(unique)

Alexandria    2
Cairo         1
Name: City, dtype: int64


In [None]:
df

Unnamed: 0,Names,Age,City
0,Ahmed,18,Alexandria
1,Mostafa,22,Cairo
2,Mariam,20,Alexandria


In [None]:
# Remove Empty Cells
data = [
    ["Ahmed" , 18 ,"Cairo"],
    ["Mostafa" , 22 , None],
    ["Mariam" , 20 , "Alexandria"] 
]
df = pd.DataFrame(data , columns = ["Names", "Age" , "City" ])
# new_df = df.dropna()
print(df)

     Names  Age        City
0    Ahmed   18       Cairo
1  Mostafa   22        None
2   Mariam   20  Alexandria


In [None]:
# Fill Empty Cells
new_df = df.fillna("Unkown")
print(new_df)

     Names  Age        City
0    Ahmed   18       Cairo
1  Mostafa   22      Unkown
2   Mariam   20  Alexandria


### Addition and Removal of Rows / Columns

In [None]:
# Add row by using loc function
df.loc[4] = ["Mohamed",26,"Tanta"]
print(df)

     Names  Age        City
0    Ahmed   18       Cairo
1  Mostafa   22        None
2   Mariam   20  Alexandria
4  Mohamed   26       Tanta


In [None]:
# Add row by using Append function
sample = pd.DataFrame([["Sara" , 24 , "Tanta"]] , columns = ["Names" , "Age" , "City" , index= [5])
# print(sample)
df = df.append(sample)
print(df)

     Names   Age        City     0     1      2
0    Ahmed  18.0       Cairo   NaN   NaN    NaN
1  Mostafa  22.0        None   NaN   NaN    NaN
2   Mariam  20.0  Alexandria   NaN   NaN    NaN
4  Mohamed  26.0       Tanta   NaN   NaN    NaN
5      NaN   NaN         NaN  Sara  24.0  Tanta


  df = df.append(sample)


In [183]:
df = df.dropna()
print(df)

     Names  Age        City
0    Ahmed   18       Cairo
2   Mariam   20  Alexandria
4  Mohamed   26       Tanta
5     Sara   24       Tanta


In [184]:
# Reset Index
# new_df = df.reset_index()
# print(new_df)
df = df.reset_index(drop=True)
print(df)

     Names  Age        City
0    Ahmed   18       Cairo
1   Mariam   20  Alexandria
2  Mohamed   26       Tanta
3     Sara   24       Tanta


In [185]:
# Delete Row by index
df = df.drop(0)
print(df)

     Names  Age        City
1   Mariam   20  Alexandria
2  Mohamed   26       Tanta
3     Sara   24       Tanta


In [186]:
df = df.reset_index(drop=True)
print(df)

     Names  Age        City
0   Mariam   20  Alexandria
1  Mohamed   26       Tanta
2     Sara   24       Tanta


In [187]:
# Add column
df["Color"] = ["Red", "Green", "Blue"]
print(df)

     Names  Age        City  Color
0   Mariam   20  Alexandria    Red
1  Mohamed   26       Tanta  Green
2     Sara   24       Tanta   Blue


In [188]:
# Delete Column
df = df.drop(columns=["Color"])
print(df)

     Names  Age        City
0   Mariam   20  Alexandria
1  Mohamed   26       Tanta
2     Sara   24       Tanta


### Load / Save Dataframe

In [197]:
# To save Dataframe
df.to_csv("dataframe.csv",index = False)

In [198]:
# Read csv
df = pd.read_csv("dataframe.csv")
print(df)

     Names  Age        City
0   Mariam   20  Alexandria
1  Mohamed   26       Tanta
2     Sara   24       Tanta


In [196]:
df = df.drop(columns=["Unnamed: 0"])
df

Unnamed: 0,Names,Age,City
0,Mariam,20,Alexandria
1,Mohamed,26,Tanta
2,Sara,24,Tanta


### Practice Problems

You have a dictionary of sales data for different products in different countries. The dictionary keys are 'Product', 'Country', 'Units Sold', and 'Sales Amount'.
- Create a pandas data frame from this dictionary and display the maximum sales.
- Remove any rows with empty cells
- Get number of different Product Category 

In [7]:
import pandas as pd

dict_values = {
"product": ['Product A', 'Product B', 'Product C', 'Product A', 'Product B'],
"country": ['USA', 'France', 'England', 'Ireland', 'Germany'],
"units_sold": [100, None, 150, 75, 300],
"sales_amount": [1000, 2500, 2000, 1000, 4000]
}

In [199]:
df = pd.DataFrame(dict_values)
print(df)

     product  country  units_sold  sales_amount
0  Product A      USA       100.0          1000
1  Product B   France         NaN          2500
2  Product C  England       150.0          2000
3  Product A  Ireland        75.0          1000
4  Product B  Germany       300.0          4000


In [201]:
df["sales_amount"].max()

4000

In [202]:
df = df.dropna()
print(df)

     product  country  units_sold  sales_amount
0  Product A      USA       100.0          1000
2  Product C  England       150.0          2000
3  Product A  Ireland        75.0          1000
4  Product B  Germany       300.0          4000


In [206]:
category_numbers = df["product"].value_counts()
print(len(category_numbers))

3


You have a 2D NumPy array that contains test scores for students in different subjects. The first row of the array contains the subject names and the first column contains the student names.
- Create a pandas data frame from this array and replace any empty cells with the mean score for that subject. 
- Display the maximum, minimumn, and the mean score for each subject.


In [244]:
import numpy as np

topics = np.array([
    ["Names" , "Topic A" , "Topic B", "Topic C" , "Topic D"]
]) 

nn = [
    ["Student 1"] ,["Student 2"] , ["Student 3"] , ["Student 4"] , ['Student 5'] , ["Student 6"] , ["Student 7"] , ["Student 8"] , ["Student 9"] , ["Student 10"]]
nn = np.array(nn)
# print(nn)
values = np.random.randint(1,100,size=(10,4))

values_nn = np.concatenate((nn , values) , axis = -1)
student_grades = np.concatenate((topics,values_nn) , axis = 0)



student_grades[1 , 4] = np.nan
student_grades[1 , 1] = np.nan
student_grades[4 , 3] = np.nan
student_grades[1 , 2] = np.nan
student_grades[8 , 2] = np.nan
student_grades[7 , 1] = np.nan

In [245]:
print(student_grades)
# print(student_grades)


[['Names' 'Topic A' 'Topic B' 'Topic C' 'Topic D']
 ['Student 1' 'nan' 'nan' '45' 'nan']
 ['Student 2' '78' '11' '20' '40']
 ['Student 3' '92' '97' '20' '98']
 ['Student 4' '79' '71' 'nan' '87']
 ['Student 5' '49' '87' '91' '30']
 ['Student 6' '56' '49' '63' '92']
 ['Student 7' 'nan' '35' '19' '94']
 ['Student 8' '74' 'nan' '84' '1']
 ['Student 9' '71' '28' '85' '9']
 ['Student 10' '38' '4' '16' '38']]


In [246]:
student_grades[1:]

array([['Student 1', 'nan', 'nan', '45', 'nan'],
       ['Student 2', '78', '11', '20', '40'],
       ['Student 3', '92', '97', '20', '98'],
       ['Student 4', '79', '71', 'nan', '87'],
       ['Student 5', '49', '87', '91', '30'],
       ['Student 6', '56', '49', '63', '92'],
       ['Student 7', 'nan', '35', '19', '94'],
       ['Student 8', '74', 'nan', '84', '1'],
       ['Student 9', '71', '28', '85', '9'],
       ['Student 10', '38', '4', '16', '38']], dtype='<U21')

In [247]:
df = pd.DataFrame(student_grades[1:] , columns=student_grades[0])

In [248]:
df

Unnamed: 0,Names,Topic A,Topic B,Topic C,Topic D
0,Student 1,,,45.0,
1,Student 2,78.0,11.0,20.0,40.0
2,Student 3,92.0,97.0,20.0,98.0
3,Student 4,79.0,71.0,,87.0
4,Student 5,49.0,87.0,91.0,30.0
5,Student 6,56.0,49.0,63.0,92.0
6,Student 7,,35.0,19.0,94.0
7,Student 8,74.0,,84.0,1.0
8,Student 9,71.0,28.0,85.0,9.0
9,Student 10,38.0,4.0,16.0,38.0


In [261]:
df["Topic B"] = df["Topic B"].astype(np.float32)

temp = df["Topic B"].fillna(0)
print(temp)
mean_B = temp.mean()


In [263]:
df["Topic B"] = df["Topic B"].fillna(mean_B) 

In [108]:
df = df.replace(to_replace="None", value=np.nan)

zeros_df = df.fillna(0)
for i in df.columns:
  if i == "Names":
    continue

  zeros_df[i] = zeros_df[i].astype('int32')
  mean = zeros_df[i].mean()
  df[i] = df[i].fillna(mean)

In [110]:
for i in df.columns:
  if i == "Names":
    continue
  print(i)

  df[i] = df[i].astype('int32')
  print(df[i].mean())
  print(df[i].min())
  print(df[i].max())
  

Topic A
50.5
21
82
Topic B
44.9
12
92
Topic C
66.3
11
99
Topic D
45.7
4
81
