[Reference](https://towardsdatascience.com/better-pythoning-2-list-comprehensions-including-nlp-examples-with-spacy-88cd76fc6412)

In [1]:
num = 10
for i in range(num):
  print(i + 1) # +1 because Python indexes from 

1
2
3
4
5
6
7
8
9
10


In [2]:
num = 10
[print(i+1) for i in range(num)]

1
2
3
4
5
6
7
8
9
10


[None, None, None, None, None, None, None, None, None, None]

In [3]:
numbers = [i + 1 for i in range(num)]
numbers

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [4]:
# create a list of numbers from 1 to 10

# for loop (3 lines)
numbers = []
for i in range(10):
  numbers.append(i + 1)
  
# list comprehension (1 line)
numbers = [i + 1 for i in range(10)]

In [5]:
# create a set of numbers from 1 to 10
{i+1 for i in range(10)}

# create a dictionary of numbers from 1 to 10 as keys and their squared counterparts as their values
{i+1 : (i+1) ** 2 for i in range(10)}

{1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81, 10: 100}

In [6]:
# student scores out of 100
student_scores = {
  'Alive': 25, 'Aida': 95, 'Aaron': 50, 'Bob': 67, 'Charlie': 85, 'Derek': 20, 'Catrin': 0, 'Terrence': 57
}

# set fail threshold
fail_threshold = 50

# using for loops and if statements (4 lines)
failing_students = []
for student, score in student_scores.items():
  if score <= fail_threshold:
    failing_students.append(student)
    
# using list comprehension (1 line)
failing_students = [student for student, score in student_scores.items() if score <= fail_threshold]

In [7]:
# student scores out of 100
student_scores = {
  'Alive': 25, 'Aida': 95, 'Aaron': 50, 'Bob': 67, 'Charlie': 85, 'Derek': 20, 'Catrin': 0, 'Terrence': 57
}

# set fail threshold
fail_threshold = 50

# using for loops and if statements (6 lines)
has_failed = []
for student, score in student_scores.items():
  if score <= fail_threshold:
    has_failed.append(1)
  else:
    has_failed.append(0)
    
# using for loops and ternary operators  (3 lines)
has_failed = []
for student, score in student_scores.items():
    has_failed.append(1 if score <= fail_threshold else 0)
    
    
# using list comprehension (1 line)
has_failed = [1 if score <= fail_threshold else 0 for student, score in student_scores.items()]

In [8]:
list_of_lists = [ [1,2], [3,4], [5,6] ]

flattened = [
  *list_ for list_ in list_of_lists # i.e. unpack each list_ from the lists in list_of_lists
]

SyntaxError: ignored

In [9]:
list_of_lists = [ [1,2], [3,4], [5,6] ]
flattened = [num for list_ in list_of_lists for num in list_]

In [10]:
flattened

[1, 2, 3, 4, 5, 6]

In [11]:
import spacy

nlp = spacy.load('en_web_core_sm')
docs = nlp.pipe(text_corpus) # text_corpus is simply a list of lists

docs = [clean_data(doc) for doc in docs] # the entire data, cleaned

# functions
def clean_data(doc):
    """
    Received a spacy doc object and cleans it
    """
    doc = remove_stopwords(doc)
    doc = remove_punctuation(doc)
    doc = remove_numbers(doc)
    doc = remove_nonalphabeticals(doc)
    doc = convert_to_str(doc)
    
    return doc

def remove_stopwords(doc):
    """ removes stopwords from a doc """
    return [token for token in doc if not token.is_stop]

def remove_punctuation(doc):
    """ removes punctuation """
    return [token for token in doc if not token.is_punct]

def remove_numbers(doc):
    """ removes 'numbers' """
    return [token for token in doc if not token.like_num]

def remove_nonalphabeticals(doc):
    """ removes non-alphabeticals """
    return [token for token in doc if token.is_alpha]

def remove_entities(doc, ents = ['PERSON', 'ORG', 'GPE']):
    return [token for token in doc if token.ent_type_ not in ents]

def convert_to_str(doc, lemmatize = True):
    """ convert spacy doc objects to string """
    return [token.lemma_.lower() if lemmatize else token.text.lower() for token in doc]

OSError: ignored