### Practice making functions

In [1]:
def say_hello():
    print("Hello!")

In [3]:
say_hello()

Hello!


In [4]:
def say_something(something):
    print(something)

In [5]:
say_something("What?")

What?


In [6]:
Jane_says = "Hi, my name is Jane. I'm learning Python!"
say_something(Jane_says)

Hi, my name is Jane. I'm learning Python!


### Using the map() function

In [7]:
# A list of grades
my_grades = ["B","C","B","D"]

In [8]:
import pandas as pd

# Convert my_grades into a series
my_grades = pd.Series(my_grades)
my_grades

0    B
1    C
2    B
3    D
dtype: object

In [9]:
# Use map function to change grades by one grade higher
my_grades.map({"B":"A", "C":"B", "D":"C"})

0    A
1    B
2    A
3    C
dtype: object

In [10]:
my_grades
# Note - the original series has not be altered here because the variable my_grades was not reset to the mapped output.

0    B
1    C
2    B
3    D
dtype: object

### Using the format() function

In [12]:
my_num_grades = [92.34, 84.56, 86.78, 98.32]

for grade in my_num_grades:
    print('{:.0f}'.format(grade))

92
85
87
98


In [13]:
# : after the variable won't work alone!
for grade in my_num_grades:
    print(grade:.1f)

SyntaxError: invalid syntax (<ipython-input-13-fe84bc9789bd>, line 3)

In [14]:
# But printing with f"string will work!
for grade in my_num_grades:
    print(f'{grade:.1f}')

92.3
84.6
86.8
98.3


### Chaining map() and format() functions

In [15]:
my_num_grades = pd.Series(my_num_grades)
my_num_grades

0    92.34
1    84.56
2    86.78
3    98.32
dtype: float64

In [16]:
my_num_grades.map("{:.0f}".format)

0    92
1    85
2    87
3    98
dtype: object

### Classes and Objects

In [17]:
class Cat:
    def __init__(self,name):
        self.name = name
first_cat = Cat("Felix")
print(first_cat.name)

Felix


In [18]:
class Dog:
    # attributes for the class
    def __init__(self, name, color, sound):
        self.name = name
        self.color = color
        self.sound = sound
    # method for the class       
    def bark(self):
        return self.sound + " " + self.sound

first_dog = Dog("Fido", "brown","woof!")
print(first_dog.name)
print(first_dog.color)
first_dog.bark()

Fido
brown


'woof! woof!'

### Binning

In [20]:
# Use data series from school district analysis
import pandas as pd

school_data_to_load = "Resources/schools_complete.csv"
# Interesting note: must cast into data frame in next cell. Error if casting in this cell!

In [21]:
school_data_df = pd.read_csv(school_data_to_load)
school_data_df

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500
5,5,Wilson High School,Charter,2283,1319574
6,6,Cabrera High School,Charter,1858,1081356
7,7,Bailey High School,District,4976,3124928
8,8,Holden High School,Charter,427,248087
9,9,Pena High School,Charter,962,585858


In [22]:
# Make series for per capita budget
per_capita = (school_data_df.set_index(["school_name"])["budget"])/(school_data_df.set_index(["school_name"])["size"])
per_capita

school_name
Huang High School        655.0
Figueroa High School     639.0
Shelton High School      600.0
Hernandez High School    652.0
Griffin High School      625.0
Wilson High School       578.0
Cabrera High School      582.0
Bailey High School       628.0
Holden High School       581.0
Pena High School         609.0
Wright High School       583.0
Rodriguez High School    637.0
Johnson High School      650.0
Ford High School         644.0
Thomas High School       638.0
dtype: float64

In [25]:
# Using qcut
pd.qcut(per_capita, q=4)

school_name
Huang High School          (641.5, 655.0]
Figueroa High School       (628.0, 641.5]
Shelton High School        (591.5, 628.0]
Hernandez High School      (641.5, 655.0]
Griffin High School        (591.5, 628.0]
Wilson High School       (577.999, 591.5]
Cabrera High School      (577.999, 591.5]
Bailey High School         (591.5, 628.0]
Holden High School       (577.999, 591.5]
Pena High School           (591.5, 628.0]
Wright High School       (577.999, 591.5]
Rodriguez High School      (628.0, 641.5]
Johnson High School        (641.5, 655.0]
Ford High School           (641.5, 655.0]
Thomas High School         (628.0, 641.5]
dtype: category
Categories (4, interval[float64]): [(577.999, 591.5] < (591.5, 628.0] < (628.0, 641.5] < (641.5, 655.0]]

In [26]:
per_capita.groupby(pd.qcut(per_capita, q=4)).count()

(577.999, 591.5]    4
(591.5, 628.0]      4
(628.0, 641.5]      3
(641.5, 655.0]      4
dtype: int64

In [35]:
per_capita.groupby(pd.qcut(per_capita, q=5)).count()

(577.999, 582.8]    3
(582.8, 618.6]      3
(618.6, 637.4]      3
(637.4, 645.2]      3
(645.2, 655.0]      3
dtype: int64

In [28]:
# Describe vs info
per_capita.describe()

count     15.000000
mean     620.066667
std       28.544368
min      578.000000
25%      591.500000
50%      628.000000
75%      641.500000
max      655.000000
dtype: float64

In [31]:
# Describe vs info
per_capita.info()

AttributeError: 'Series' object has no attribute 'info'

In [33]:
# Use .info on data frames!
school_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   School ID    15 non-null     int64 
 1   school_name  15 non-null     object
 2   type         15 non-null     object
 3   size         15 non-null     int64 
 4   budget       15 non-null     int64 
dtypes: int64(3), object(2)
memory usage: 728.0+ bytes


In [34]:
school_data_df.describe()

Unnamed: 0,School ID,size,budget
count,15.0,15.0,15.0
mean,7.0,2611.333333,1643295.0
std,4.472136,1420.915282,934776.3
min,0.0,427.0,248087.0
25%,3.5,1698.0,1046265.0
50%,7.0,2283.0,1319574.0
75%,10.5,3474.0,2228999.0
max,14.0,4976.0,3124928.0


In [36]:
# Cut can be applied to series or a data frame:
bins = [0,625,1000]
pd.cut(per_capita,bins)

school_name
Huang High School        (625, 1000]
Figueroa High School     (625, 1000]
Shelton High School         (0, 625]
Hernandez High School    (625, 1000]
Griffin High School         (0, 625]
Wilson High School          (0, 625]
Cabrera High School         (0, 625]
Bailey High School       (625, 1000]
Holden High School          (0, 625]
Pena High School            (0, 625]
Wright High School          (0, 625]
Rodriguez High School    (625, 1000]
Johnson High School      (625, 1000]
Ford High School         (625, 1000]
Thomas High School       (625, 1000]
dtype: category
Categories (2, interval[int64]): [(0, 625] < (625, 1000]]

In [37]:
# Cut can be applied to series or a data frame:
bins2 = [0,1000,5000]
pd.cut(school_data_df["size"],bins2)

0     (1000, 5000]
1     (1000, 5000]
2     (1000, 5000]
3     (1000, 5000]
4     (1000, 5000]
5     (1000, 5000]
6     (1000, 5000]
7     (1000, 5000]
8        (0, 1000]
9        (0, 1000]
10    (1000, 5000]
11    (1000, 5000]
12    (1000, 5000]
13    (1000, 5000]
14    (1000, 5000]
Name: size, dtype: category
Categories (2, interval[int64]): [(0, 1000] < (1000, 5000]]