In [None]:
Notes: 






Imports

In [7]:
from string import punctuation
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
stopwords = ENGLISH_STOP_WORDS
from itertools import zip_longest
import numpy as np
import pandas as pd
from scipy import stats

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use('fivethirtyeight')




Intro to Python

In [3]:
def lowercase_text(text):
    '''Returns a text string with all characters lower-cased.'''

    return text.lower()


def remove_punctuation(text, punctuation=punctuation):
    '''Returns a text string without punctuation.'''

    return ''.join([c for c in text if c not in punctuation])


def remove_newline(text):
    '''Removes all newlines in a line of text

    Parameters
    ----------
    text: str

    Returns
    -------
    text_no_nl: str

    Examples
    --------
    >>> remove_newline('\nlife happens when youre busy\n making other plans\n')
    'life happens when youre busy making other plans'
    '''
    return text.replace('\n', '')


def split_text_into_words(text):
    '''Splits a text string into a word list

    Parameters
    ----------
    text: str

    Returns
    -------
    words: list of str

    Examples
    --------
    >>> split_text_into_words("get started by stop talking and begin doing")
    ['get', 'started', 'by', 'stop', 'talking', 'and', 'begin', 'doing']
    '''
    return text.split(' ')


def remove_stopwords(word_lst, stopwords_set):
    '''Removes words from word_lst if in the stopwords_set

    Parameters
    ----------
    word_lst: list of str
    stopwords_set: set of str

    Returns
    -------
    word_lst_no_sw: list of str

    Examples
    --------
    >>> remove_stopwords(['tell', 'me', 'and', 'i', 'forget'], set(['and', 'i']))
    ['tell', 'me', 'forget']
    '''
    return [word for word in word_lst if word not in stopwords_set]


def replace_names(word_lst, name_set, replacement_val):
    '''Replaces names in word_lst with replacement_val.  Names are identified in
    the name set.

    Parameters
    ----------
    word_lst: list of str
    name_set: set of str
    replacement_val: str
        The string to replace the names with.

    Returns
    -------
    word_lst_replaced_names: list of str

    Examples
    --------
    >>> replace_names(['daryl', 'daryl'], set(['larry', 'darryl']), 'person')
    ['person', 'person']
    '''
    word_lst_with_replacement = []
    for word in word_lst:
        if word in name_set:
            val = replacement_val
        else:
            val = word
        word_lst_with_replacement.append(val)
    return word_lst_with_replacement


def create_cleaned_textline_from_words(words):
    '''Makes a single string from a list of words.

    Parameters
    ----------
    words: list of str

    Returns
    -------
    cleaned_text: str

    Examples
    --------
    >>> create_cleaned_textline_from_words(['darkest', 'moments', 'focus', 'light'])
    'darkest moments focus light'
    '''
    text = ' '.join([word for word in words])
    return text


def line_cleaning_pipeline(text, stopwords_set, name_set, replace_val):
    '''Transforms raw text into clean text using text-cleaning functions above'''
    text_lc = lowercase_text(text)
    text_np = remove_punctuation(text_lc)
    text_nnl = remove_newline(text_np)
    words = split_text_into_words(text_nnl)
    words_nsw = remove_stopwords(words, stopwords_set)
    words_cleaned = replace_names(words_nsw, name_set, replace_val)
    line_of_text_cleaned = create_cleaned_textline_from_words(words_cleaned)
    return line_of_text_cleaned


if __name__ == '__main__':
    # to help test functions and pipeline:
    text_str1 = "Seok-woo, a divorced fund manager, is a workaholic and absentee father to \nhis"
    text_str2 = "young daughter, Su-an. For her birthday the next day, she wishes for her father\n"
    text_str3 = "to take her to Busan to see her mother. \nThey board the KTX at Seoul Station."

    # your code below
    text = text_str1
    replace = 'person'
    names = set(['suan', 'seongkyeong', 'yonsuk', 'seokwoo', 'ingil', 'yonghuk'
                 'jinhee'])
    # test functions
    text_lc = lowercase_text(text)
    text_np = remove_punctuation(text_lc)
    text_nnl = remove_newline(text_np)
    words = split_text_into_words(text_nnl)
    words_nsw = remove_stopwords(words, stopwords)
    words_cleaned = replace_names(words_nsw, names, replace)
    line_of_text_cleaned = create_cleaned_textline_from_words(words_cleaned)

    # test pipeline
    line_text_pipeline = line_cleaning_pipeline(text, stopwords, names, replace)

    same_result = line_of_text_cleaned == line_text_pipeline

    # print results
    print(f"Original: {text}")
    print(f"Lowercased: {text_lc}.")
    print(f"Punctuation removed: {text_np}.")
    print(f"Without newlines: {text_nnl}")
    print(f"Into words: {words}")
    print(f"No stop words: {words_nsw}")
    print(f"Replaces names: {words_cleaned}")
    print(f"Lined of cleaned text: {line_of_text_cleaned}")
    print(f"\nDoes pipeline give same result? {same_result}")


Original: Seok-woo, a divorced fund manager, is a workaholic and absentee father to 
his
Lowercased: seok-woo, a divorced fund manager, is a workaholic and absentee father to 
his.
Punctuation removed: seokwoo a divorced fund manager is a workaholic and absentee father to 
his.
Without newlines: seokwoo a divorced fund manager is a workaholic and absentee father to his
Into words: ['seokwoo', 'a', 'divorced', 'fund', 'manager', 'is', 'a', 'workaholic', 'and', 'absentee', 'father', 'to', 'his']
No stop words: ['seokwoo', 'divorced', 'fund', 'manager', 'workaholic', 'absentee', 'father']
Replaces names: ['person', 'divorced', 'fund', 'manager', 'workaholic', 'absentee', 'father']
Lined of cleaned text: person divorced fund manager workaholic absentee father

Does pipeline give same result? True


OOP

In [5]:
class Polynomial(object):

    def __init__(self, coefs):
        """Create a Polynomial object
        Parameters
        ----------
        coefs : list of numbers
            The coefficients of the polynomial, starting at the constant term,
            so the index of the coefficient corresponds to the power.
        """

        coefs = coefs[:]
        while len(coefs) > 0 and coefs[-1] == 0:
            coefs.pop()
        if len(coefs) == 0:
            coefs = [0]
        self.coefs = coefs

    def __add__(self, other):
        """Add to polynomial."""
        result = []
        for c1, c2 in zip_longest(self.coefs, other.coefs, fillvalue=0):
            result.append(c1 + c2)
        return Polynomial(result)

    def __sub__(self, other):
        """Subtract another polynomial."""
        return self + -other

    def __mul__(self, other):
        """Multipy by another polynomial."""
        result = [0] * (len(self.coefs) + len(other.coefs) + 1)
        for power_1, coef_1 in enumerate(self.coefs):
            for power_2, coef_2 in enumerate(other.coefs):
                result[power_1 + power_2] += coef_1 * coef_2
        return Polynomial(result)

    def __eq__(self, other):
        """Check if equal to another polynomial."""
        return self.coefs == other.coefs

    def _sign_string(self, coef, is_largest):
        """The sign part of the string representation of a term,
        used by __str__."""
        assert(coef != 0)
        if is_largest and coef > 0:
            return ""
        elif is_largest and coef < 0:
            return "-"
        elif coef > 0:
            return " + "
        else:
            return " - "

    def _coef_string(self, coef, index):
        """The coefficient part of the string representation of a term,
        used by __str__."""
        if abs(coef) == 1 and index != 0:
            return ''
        return str(abs(coef))

    def _var_string(self, index):
        """The variable part of the string representation of a term,
        used by __str__."""
        if index == 0:
            return ""
        elif index == 1:
            return "x"
        else:
            return "x^" + str(index)

    def _term_string(self, coef, index, is_largest):
        """The string representation of a single term,
        used by __str__."""
        if coef == 0:
            return ""
        return (self._sign_string(coef, is_largest) +
                self._coef_string(coef, index) +
                self._var_string(index))

    def __str__(self):
        """The string representation of the polynomial."""
        result = ""
        if len(self.coefs) == 1 and self.coefs[0] == 0:
            return "0"
        for i in range(len(self.coefs) - 1, -1, -1):
            result += self._term_string(self.coefs[i],
                                        i,
                                        i == len(self.coefs)-1)
        return result

    def __repr__(self):
        return "Polynomial({})".format(self.coefs)

    def __neg__(self):
        """The negative of the polynomial."""
        result = []
        for coef in self.coefs:
            result.append(-coef)
        return Polynomial(result)

    def degree(self):
        """The highest exponent in the polynomial."""
        return len(self.coefs) - 1

    def evaluate(self, x):
        """Evaluate the polynomial with for a specified value of the variable
        Parameters
        ----------
        x : number
            The number to set the variable to.

        Returns
        -------
        number
            The result of evaluating the polynomial.
        """

        result = 0
        for power, coef in enumerate(self.coefs):
            result += coef * x ** power
        return result

    def differentiate(self):
        """ The derivative of a polynomial.

        Returns
        -------
        Polynomial
            The derivative of the polynomial.
        """

        result = []
        for i in range(1, len(self.coefs)):
            result.append(i * self.coefs[i])
        return Polynomial(result)

    def integrate(self):
        """ The integral of a polynomial.

        Returns
        -------
        Polynomial
            The integral of the polynomial,
            with the constant term set to zero.
        """

        result = [0]
        for i in range(len(self.coefs)):
            result.append(self.coefs[i] / (i+1))
        return Polynomial(result)


GIT Intro

NUMPY

In [10]:
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

x[2] # 2

x[1:8:2]  # array([1, 3, 5, 7])

x[0] = 100 # array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9])

x[::-1]  # array([9, 8, 7, 6, 5, 4, 3, 2, 1, 100])


array([1, 3, 5, 7])

In [None]:
np.array([[10, 11], [14, 15], [18, 19]])
# 3 rows
# 2 columns

np.array([[3, 4, 5]])
# 1 rows
# 3 columns

np.array([[3], [4], [5]])
# 3 rows
# 1 columns


In [None]:
x_array = [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]]

# array([[ 0,  1,  2,  3],
#        [ 4,  5,  6,  7],
#        [ 8,  9, 10, 11],
#        [12, 13, 14, 15],
#        [16, 17, 18, 19]])

x_array[2][2]
# 10

x_array[:2, :]
# array([[0, 1, 2, 3],
#        [4, 5, 6, 7]])

x_array[:2, :2]
# array([[0, 1],
#        [4, 5]])


x_array[:, :2]
# array([[ 0,  1],
#        [ 4,  5],
#        [ 8,  9],
#        [12, 13],
#        [16, 17]])

x_array[::2, ::2]
# array([[ 0,  2],
#        [ 8, 10],
#        [16, 18]])

x_array[:,2:]
# array([[ 2,  3],
#        [ 6,  7],
#        [10, 11],
#        [14, 15],
#        [18, 19]])