## Pandas 

### Three fundermental Pandas data Structures 

#### Series as a generalized Numpy Array

In [1]:
import pandas as pd

data = pd.Series([0.25,0.5,0.75,1.0])
print(data)
data.values, data.index, data[1]

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


(array([0.25, 0.5 , 0.75, 1.  ]),
 RangeIndex(start=0, stop=4, step=1),
 np.float64(0.5))

In [15]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['apple','banana','cabbage','donuts'])
data2 = pd.Series([0,2,3,4], index=[2,3,4,5])
data , data2

(apple      0.25
 banana     0.50
 cabbage    0.75
 donuts     1.00
 dtype: float64,
 2    0
 3    2
 4    3
 5    4
 dtype: int64)

#### Series as a specialized dictionary 

In [2]:
import pandas as pd
population_dict = {
'California': 38332521,
 'Texas': 26448193,
 'New York': 19651127,
 'Florida': 19552860,
 'Illinois': 12882135
}

population = pd.Series(population_dict)
population
# population['California':'Florida'].values

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

### The Pandas DataFrame Object 

#### DataFrame as a generalized NumPy Array

In [40]:
import pandas as pd

area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)

states = pd.DataFrame({'Population':population, 'Area':area})
states['Population Density'] = states['Population'] / states['Area']
print(states)

            Population    Area  Population Density
California    38332521  423967           90.413926
Texas         26448193  695662           38.018740
New York      19651127  141297          139.076746
Florida       19552860  170312          114.806121
Illinois      12882135  149995           85.883763


In [13]:
import pandas as pd

population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}

area_dict = {'California': 423967, 'Texas'Florida       170312
: 695662, 'New York': 141297,'Florida': 170312, 'Illinois': 149995}

population = pd.Series(population_dict)
area = pd.Series(area_dict)

states_dict = {'population': population, 'area': area }
states = pd.DataFrame(states_dict)

print(states)
states['area']


            population    area
California    38332521  423967
Texas         26448193  695662
New York      19651127  141297
Florida       19552860  170312
Illinois      12882135  149995


California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

#### DataFrame as a specialized Dictionary

In [30]:
import numpy as np
import pandas as pd
dtype = [('A','i8'),('B','f8')]
A = np.zeros(3, dtype = dtype)
print(A)
Adata = pd.DataFrame(A)
print(Adata)
# Aindex = pd.Index(A)
# Aindex[:2]

[(0, 0.) (0, 0.) (0, 0.)]
   A    B
0  0  0.0
1  0  0.0
2  0  0.0


In [32]:
ind = pd.Index([2, 3, 5, 7, 11])
ind[::2]

Index([2, 5, 11], dtype='int64')

#### Practice

In [71]:
# Exercise 

import pandas as pd
import numpy as np

file = pd.read_csv('president.csv')
data = np.array(file)
data[:,0], data[:,1], data[:,2]
# np.hsplit(data,3)[0] ~ data[:,0]
# Now we assign them using DataFrame and Series from the pandas Library

presidents = pd.Series(data[:,1], index=data[:,0])
heights = pd.Series(data[:,2], index=data[:,0])
# info_dict = {'Presidents': presidents, 'heights':heights}
# info = pd.DataFrame(info_dict) 

info = pd.DataFrame(data[:,1:], columns=['Presidents','Heights'] , index=data[:,0])
info

Unnamed: 0,Presidents,Heights
1,George Washington,189
2,John Adams,170
3,Thomas Jefferson,189
4,James Madison,180
5,James Monroe,175
6,John Quincy Adams,160
7,Andrew Jackson,185
8,Martin Van Buren,170
9,William Henry Harrison,165
10,John Tyler,168


### Data Selection and Indexing 

In [6]:
import pandas as pd
data = pd.Series([0.25,0.5,0.75,1.0], index = ['a','b','c','d'])
data['e'] =0.7 
data
# data.keys()
# list(data.items())

a    0.25
b    0.50
c    0.75
d    1.00
e    0.70
dtype: float64

In [8]:
data[(data > 0.3) & (data < 0.8)]

b    0.50
c    0.75
e    0.70
dtype: float64

### Indexers (loc,iloc and ix)

In [19]:
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data
print(data.loc[:3])
print()
print(data.iloc[:3])

1    a
3    b
dtype: object

1    a
3    b
5    c
dtype: object


In [43]:
import random
import numpy as np
import pandas as pd
np.random.seed(1)
my_array = np.random.randint(12, size=12).reshape((3,4))
my_array = pd.DataFrame(my_array, columns=['A', 'B', 'C', 'D'] )
my_array

Unnamed: 0,A,B,C,D
1,5,11,8,9
2,11,5,0,0
3,1,7,6,9


#### Exercise 

In [107]:
import pandas as pd
import numpy as np

file = pd.read_json('food_data.json')
data = np.array(file['meals'])
# print(data)
my_array = []
for i ,row in enumerate(data):
    my_array.append([row['strMeal'], row['strArea'],row['strTags'],row['strInstructions']])
my_array = np.array(my_array)
index_array = np.arange(len(my_array))
index_array += 1 

for i in range(len(my_array)):
    my_array[:,3][i] = str(my_array[:,3][i]).replace('\r\n', '').replace('\\r\\n', '').replace('0.\t', '').replace('\t', '')
    # my_array[:,2][i] = str(my_array[:,2][i]) + ''

    # if my_array[:,2][i] is None:
    #     my_array[:,2][i] = 'No Tag'

data_dict = {
             'Meal Name':my_array[:,0],
             'Origin':my_array[:,1], 
             'Tags ':my_array[:,2], 
             'Instructions':my_array[:,3]
            }
# pd.set_option('display.max_colwidth', None)
my_dataframe = pd.DataFrame(data_dict, index=index_array)
my_dataframe[my_dataframe.isnull()] = 'No Tag'
my_dataframe

Unnamed: 0,Meal Name,Origin,Tags,Instructions
1,Corba,Turkish,Soup,"Pick through your lentils for any foreign debris, rinse them 2 or 3 times, drain, and set aside. Fair warning, this will probably turn your lentils into a solid block that you’ll have to break up laterIn a large pot over medium-high heat, sauté the olive oil and the onion with a pinch of salt for about 3 minutes, then add the carrots and cook for another 3 minutes.Add the tomato paste and stir it around for around 1 minute. Now add the cumin, paprika, mint, thyme, black pepper, and red pepper as quickly as you can and stir for 10 seconds to bloom the spices. Congratulate yourself on how amazing your house now smells.Immediately add the lentils, water, broth, and salt. Bring the soup to a (gentle) boil.After it has come to a boil, reduce heat to medium-low, cover the pot halfway, and cook for 15-20 minutes or until the lentils have fallen apart and the carrots are completely cooked.After the soup has cooked and the lentils are tender, blend the soup either in a blender or simply use a hand blender to reach the consistency you desire. Taste for seasoning and add more salt if necessary.Serve with crushed-up crackers, torn up bread, or something else to add some extra thickness. You could also use a traditional thickener (like cornstarch or flour), but I prefer to add crackers for some texture and saltiness. Makes great leftovers, stays good in the fridge for about a week."
2,Tamiya,Egyptian,No Tag,"oak the beans in water to cover overnight.Drain. If skinless beans are unavailable, rub to loosen the skins, then discard the skins. Pat the beans dry with a towel.Grind the beans in a food mill or meat grinder.If neither appliance is available, process them in a food processor but only until the beans form a paste. (If blended too smoothly, the batter tends to fall apart during cooking.) Add the scallions, garlic, cilantro, cumin, baking powder, cayenne, salt, pepper, and coriander, if using. Refrigerate for at least 30 minutes.Shape the bean mixture into 1-inch balls.Flatten slightly and coat with flour.Heat at least 1½-inches of oil over medium heat to 365 degrees.Fry the patties in batches, turning once, until golden brown on all sides, about 5 minutes.Remove with a wire mesh skimmer or slotted spoon. Serve as part of a meze or in pita bread with tomato-cucumber salad and tahina sauce."
3,Lasagne,Italian,No Tag,"Heat the oil in a large saucepan. Use kitchen scissors to snip the bacon into small pieces, or use a sharp knife to chop it on a chopping board. Add the bacon to the pan and cook for just a few mins until starting to turn golden. Add the onion, celery and carrot, and cook over a medium heat for 5 mins, stirring occasionally, until softened.Add the garlic and cook for 1 min, then tip in the mince and cook, stirring and breaking it up with a wooden spoon, for about 6 mins until browned all over.Stir in the tomato purée and cook for 1 min, mixing in well with the beef and vegetables. Tip in the chopped tomatoes. Fill each can half full with water to rinse out any tomatoes left in the can, and add to the pan. Add the honey and season to taste. Simmer for 20 mins.Heat oven to 200C/180C fan/gas 6. To assemble the lasagne, ladle a little of the ragu sauce into the bottom of the roasting tin or casserole dish, spreading the sauce all over the base. Place 2 sheets of lasagne on top of the sauce overlapping to make it fit, then repeat with more sauce and another layer of pasta. Repeat with a further 2 layers of sauce and pasta, finishing with a layer of pasta.Put the crème fraîche in a bowl and mix with 2 tbsp water to loosen it and make a smooth pourable sauce. Pour this over the top of the pasta, then top with the mozzarella. Sprinkle Parmesan over the top and bake for 25–30 mins until golden and bubbling. Serve scattered with basil, if you like."
4,Kafteji,Tunisian,No Tag,"Peel potatoes and cut into 5cm cubes.Pour 1-2 cm of olive oil into a large pan and heat up very hot. Fry potatoes until golden brown for 20 minutes, turning from time to time. Place on kitchen paper to drain.Cut the peppers in half and remove seeds. Rub a little olive oil on them and place the cut side down on a baking tray. Place them under the grill. Grill until the skin is dark and bubbly. While the peppers are still hot, put them into a plastic sandwich bag and seal it. Take them out after 15 minutes and remove skins.In the meantime, heat more olive oil another pan. Peel the onions and cut into thin rings. Fry for 15 minutes until golden brown, turning them often. Add the Ras el hanout at the end.Cut the pumpkin into 5cm cubes and fry in the same pan you used for the potatoes for 10-15 minutes until it is soft and slightly browned. Place on kitchen paper.Pour the remaining olive oil out of the pan and put all the cooked vegetables into the pan and mix. Whisk eggs and pour them over the vegetables. Put the lid on the pan so that the eggs cook. Put the contents of the pan onto a large chopping board, add salt and pepper and chopped and mix everything with a big knife."
5,Dal fry,Indian,"Curry,Vegetarian,Cake","Wash and soak toor dal in approx. 3 cups of water, for at least one hours. Dal will be double in volume after soaking. Drain the water.Cook dal with 2-1/2 cups water and add salt, turmeric, on medium high heat, until soft in texture (approximately 30 mins) it should be like thick soup.In a frying pan, heat the ghee. Add cumin seeds, and mustard seeds. After the seeds crack, add bay leaves, green chili, ginger and chili powder. Stir for a few seconds.Add tomatoes, salt and sugar stir and cook until tomatoes are tender and mushy.Add cilantro and garam masala cook for about one minute.Pour the seasoning over dal mix it well and cook for another minute.Serve with Naan."
6,Big Mac,American,No Tag,"For the Big Mac sauce, combine all the ingredients in a bowl, season with salt and chill until ready to use.2. To make the patties, season the mince with salt and pepper and form into 4 balls using about 1/3 cup mince each. Place each onto a square of baking paper and flatten to form into four x 15cm circles. Heat oil in a large frypan over high heat. In 2 batches, cook beef patties for 1-2 minutes each side until lightly charred and cooked through. Remove from heat and keep warm. Repeat with remaining two patties.3. Carefully slice each burger bun into three acrossways, then lightly toast.4. To assemble the burgers, spread a little Big Mac sauce over the bottom base. Top with some chopped onion, shredded lettuce, slice of cheese, beef patty and some pickle slices. Top with the middle bun layer, and spread with more Big Mac sauce, onion, lettuce, pickles, beef patty and then finish with more sauce. Top with burger lid to serve.5. After waiting half an hour for your food to settle, go for a jog."
7,Koshari,Egyptian,No Tag,"Cook the lentils. Bring lentils and 4 cups of water to a boil in a medium pot or saucepan over high heat. Reduce the heat to low and cook until lentils are just tender (15-17 minutes). Drain from water and season with a little salt. (Note: when the lentils are ready, they should not be fully cooked. They should be only par-cooked and still have a bite to them as they need to finish cooking with the rice).Now, for the rice. Drain the rice from its soaking water. Combine the par-cooked lentils and the rice in the saucepan over medium-high heat with 1 tbsp cooking oil, salt, pepper, and coriander. Cook for 3 minutes, stirring regularly. Add warm water to cover the rice and lentil mixture by about 1 1/2 inches (you’ll probably use about 3 cups of water here). Bring to a boil; the water should reduce a bit. Now cover and cook until all the liquid has been absorbed and both the rice and lentils are well cooked through (about 20 minutes). Keep covered and undisturbed for 5 minutes or so.Now make the pasta. While the rice and lentils are cooking, make the pasta according to package instructions by adding the elbow pasta to boiling water with a dash of salt and a little oil. Cook until the pasta is al dente. Drain.Cover the chickpeas and warm in the microwave briefly before serving.Make the crispy onion topping. Sprinkle the onion rings with salt, then toss them in the flour to coat. Shake off excess flour.In a large skillet, heat the cooking oil over medium-high heat, cook the onion rings, stirring often, until they turn a nice caramelized brown. Onions must be crispy, but not burned (15-20 minutes)."
8,Kapsalon,Dutch,Snack,"Cut the meat into strips. Heat oil in a pan and fry the strips for 6 minutes until it's ready.Bake the fries until golden brown in a deep fryrer. When ready transfer to a backing dish. Make sure the fries are spread over the whole dish.Cover the fries with a new layer of meat and spread evenly.Add a layer of cheese over the meat. You can also use grated cheese. When done put in the oven for a few minutes until the cheese is melted.Chop the lettuce, tomato and cucumber in small pieces and mix together. for a basic salad. As extra you can add olives jalapenos and a red union.Dived the salad over the dish and Serve with garlicsauce and hot sauce"
9,Stamppot,Dutch,"Savory,Breakfast","Wash and peel the potatoes and cut into similarly sized pieces for even cooking.In a large soup pot, boil the potatoes and the bay leaves in salted water for 20 minutes. Discard the bay leaves.If you're not using a bag of ready-cut curly kale, wash the bunches thoroughly under cool running water to get rid of all soil—you wouldn't want that gritty texture in your finished dish. Trim any coarse stems and discard any brown leaves. With a sharp knife, cut the curly kale into thin strips.Peel and chop the shallots.In a frying pan or skillet, melt 1 tbsp. of butter and saute the shallots for a few minutes before adding the curly kale and 2 tbsp. of water. Season and cook for about 10 minutes, or until tender.Warm the milk on the stove or in the microwave.Drain, shake and dry the potatoes with kitchen towels before mashing with a potato masher or ricer. Working quickly, add the warm milk and the remaining butter. Season to taste with nutmeg, salt, and pepper. Mix the cooked curly kale through the cooked mashed potato mixture.Top with slices of the smoked sausage and serve hot with your favorite mustard or gravy.Serve and enjoy!"
10,Flamiche,French,Tart,"For the pastry, sift the flour and salt into the bowl of a food processor, add the butter and lard, then whizz together briefly until the mixture looks like fine breadcrumbs. Tip the mixture into a bowl, then stir in the cheese and enough of the water for the mixture to come together. Tip out onto a lightly floured surface and knead briefly until smooth. Roll out thinly and line a 23cm x 4cm loose-?bottomed fluted flan tin. Prick the base with a fork. Chill for 20 minutes.02.Melt the 75g butter in a saucepan over a low heat, then add the leeks and the salt. Cover and cook for ?10 minutes until soft. Uncover the pan, increase the heat and cook ?for 2 minutes, stirring occasionally, until the liquid has evaporated. Spoon onto a plate and leave to cool.03.Preheat the oven to 200°C/fan180°C/gas 6. Line the pastry case with baking paper and baking beans or rice and blind bake for 15-20 minutes until the edges are biscuit-coloured. Remove the paper and beans/rice and return the case to the oven for 7-10 minutes until the base is crisp and lightly golden. Remove and set aside. Reduce the oven temperature to 190°C/fan170°C/gas 5.04.Put the crème fraîche into a bowl with the whole egg, egg yolks and nutmeg. Lightly beat together, then season. Stir in the leeks. Spoon ?the mixture into the tart case and bake for 35-40 minutes until set ?and lightly golden. Remove from ?the oven and leave for 10 minutes. Take out of the tin and serve."


#### UFuncs: Index Alignment

In [30]:
import pandas as pd
import random as rng
import numpy as np
# A = pd.Series([2, 4, 6], index=[0, 1, 2])
# B = pd.Series([1, 3, 5], index=[1, 2, 3])
# A.add(B,fill_value =0)


A = pd.DataFrame(np.arange(0, 20).reshape((2,10)))
print(A)
# B = pd.DataFrame(np.arange(0, 10).reshape((3,3)),columns=list('BAC'))
# print(B)

    0   1   2   3   4   5   6   7   8   9
0   0   1   2   3   4   5   6   7   8   9
1  10  11  12  13  14  15  16  17  18  19


In [39]:
import numpy as np
import random
import pandas as pd
random.seed(0)
array_a = np.random.randint(6, size=8)
array_b = np.random.randint(6,size=7)
series_a = pd.Series(array_a, index=[1,2,3,4,5,6,7,8])
series_b = pd.Series(array_b, index=[1,2,3,4,5,6,7])
series_a.add(series_b,fill_value = 0)

1     3.0
2     3.0
3    10.0
4     4.0
5     5.0
6     6.0
7     8.0
8     2.0
dtype: float64

#### Handling Missing data 

In [82]:
# vals1 = np.array([1, None, 3, 4])  
# vals1.sum()
import numpy as np
vals2 = np.array([1, np.nan, 3, 4])
print(np.nansum(vals2))
print(np.sum(vals2))
# np.nansum, np.nanmax, np.nanmin  // Handling numerical missing data 

8.0
nan


#### Operating on Null Values 

In [92]:
data = pd.Series([1, np.nan, 2, None])
data[data.isnull()] = 0
# data[data.notnull()].sum()

data

0    1.0
1    0.0
2    2.0
3    0.0
dtype: float64

##### isnull()
##### Generate a Boolean mask indicating missing values
##### notnull()
##### Opposite of isnull()
##### dropna()
##### Return a filtered version of the data
##### fillna()
##### Return a copy of the data with missing values filled or imputed