In [None]:
from google.colab import drive
drive.mount("/content/gdrive")
%cd /content/gdrive/MyDrive/Colab Notebooks/13 - NLP/

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/Colab Notebooks/13 - NLP


In [None]:
pip install unidecode

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict, Counter
import sys, string, random, re, requests, os, textwrap, unidecode
from bs4 import BeautifulSoup
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, classification_report
import nltk, spacy
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.util import ngrams
from bs4 import BeautifulSoup
import lib.plot_helper as plot_helper
nltk.download(['stopwords', 'punkt', 'wordnet', 'vader_lexicon'])
nlp = spacy.load("en_core_web_sm")
stop_words = stopwords.words('english')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


# Spam Detection (TF-IDF)

## Data Importing

In [None]:
messages = pd.read_csv('data/SMSSpamCollection', sep='\t', names=["label", "message"])
messages['length'] = messages['message'].apply(len)
print(messages.shape)
messages.head()

(5572, 3)


Unnamed: 0,label,message,length
0,ham,"Go until jurong point, crazy.. Available only ...",111
1,ham,Ok lar... Joking wif u oni...,29
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,155
3,ham,U dun say so early hor... U c already then say...,49
4,ham,"Nah I don't think he goes to usf, he lives aro...",61


In [None]:
df_msg = messages[messages['length'] < np.percentile(messages['length'], 99)].copy()
plot_helper.get_histrogram_bi(df_msg, 'label', 'length', bins=100)

## Text Processing

In [None]:
wordnet_lemmatizer = WordNetLemmatizer()
stop_words = stopwords.words('english')
def preprocess_text(text, token=True):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = unidecode.unidecode(text)
    text = [t.lower() for t in text.split() if len(t) > 2]
    text = [t for t in text if t not in stop_words]
    # text = [t.lemma_ for t in nlp(' '.join(text))]
    # text = [wordnet_lemmatizer.lemmatize(t) for t in text] 
    if token == False:
        text = ' '.join(text)
    return text

In [None]:
messages['message'] = messages['message'].apply(lambda x: preprocess_text(x, False))
messages['text_len'] = messages['message'].apply(len)
messages = messages[messages['text_len'] != 0]
messages.head()

Unnamed: 0,label,message,length,text_len
0,ham,jurong point crazy available bugis great world...,111,72
1,ham,lar joking wif oni,29,18
2,spam,free entry wkly comp win cup final tkts may te...,155,90
3,ham,dun say early hor already say,49,29
4,ham,nah think goes usf lives around though,61,38


## Model Training

In [None]:
# train test split 
msg_train, msg_test, label_train, label_test = train_test_split(messages['message'], messages['label'], test_size=0.2)

**Vectorization**

1. Count how many times does a word occur in each message (Known as term frequency)

2. Weigh the counts, so that frequent tokens get lower weight (inverse document frequency)

3. Normalize the vectors to unit length, to abstract from the original text length (L2 norm)

**TF: Term Frequency**, which measures how frequently a term occurs in a document. Since every document is different in length, it is possible that a term would appear much more times in long documents than shorter ones. Thus, the term frequency is often divided by the document length.

<b>*TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document).*</b>

**IDF: Inverse Document Frequency**, which measures how important a term is. While computing TF, all terms are considered equally important. However it is known that certain terms, such as "is", "of", and "that", may appear a lot of times but have little importance. Thus we need to weigh down the frequent terms while scale up the rare ones, by computing the following: 

<b>*IDF(t) = log_e(Total number of documents / Number of documents with term t in it).*</b>

**Example:**

Consider a document containing 100 words wherein the word cat appears 3 times. <br/>
The term frequency (i.e., tf) for cat is then (3 / 100) = 0.03. Now, assume we have 10 million documents and the word cat appears in one thousand of these. <br/>Then, the inverse document frequency (i.e., idf) is calculated as log(10,000,000 / 1,000) = 4. Thus, the Tf-idf weight is the product of these quantities: 0.03 * 4 = 0.12.

In [None]:
# we can do it in the pipelines
# pipeline = Pipeline([
#     ('tfidf', TfidfVectorizer()),  # integer counts to weighted TF-IDF scores
#     ('classifier', MultinomialNB()),  # train on TF-IDF vectors w/ Naive Bayes classifier
# ])

# pipeline.fit(msg_train,label_train)
# predictions = pipeline.predict(msg_test)
# print(classification_report(predictions,label_test))

In [None]:
# term frequency, term counts per documents/rows
# vectorizer = CountVectorizer(analyzer=text_process)
# msg_train = vectorizer.fit_transform(msg_train)
# msg_test = vectorizer.transform(msg_test)
# vectorizer.vocabulary['bishan']
# print(len(vectorizer.vocabulary))

# TF-IDF scores
# transformer = TfidfTransformer()
# msg_train = transformer.fit_transform(msg_train)
# msg_test = transformer.transform(msg_test)
# print(transformer.idf_[vectorizer.vocabulary_['u']])
# print(transformer.idf_[vectorizer.vocabulary_['bishan']])

# Combine above 2 steps
# ngram_range to include bigram, include top 4000 vocab
tf_vectorizer = TfidfVectorizer(max_features=4000, ngram_range=(1, 3))
msg_train = tf_vectorizer.fit_transform(msg_train)
msg_test = tf_vectorizer.transform(msg_test)

In [None]:
# TF - IDF of train data
print(msg_train[100])

  (0, 3145)	0.5036806945662851
  (0, 1967)	0.4024636709946884
  (0, 2161)	0.5036806945662851
  (0, 314)	0.478181126619128
  (0, 3771)	0.3193388788061832


In [None]:
tf_vectorizer.get_feature_names()[500]

'cash prize claim'

In [None]:
# model = AdaBoostClassifier()
model = MultinomialNB()
model.fit(msg_train, label_train)
print("Classification rate for NB:", model.score(msg_test, label_test))

Classification rate for NB: 0.9566395663956639


In [None]:
pred = model.predict(msg_test)
print(confusion_matrix(pred, label_test))
print(classification_report(pred, label_test))

[[951  48]
 [  0 108]]
              precision    recall  f1-score   support

         ham       1.00      0.95      0.98       999
        spam       0.69      1.00      0.82       108

    accuracy                           0.96      1107
   macro avg       0.85      0.98      0.90      1107
weighted avg       0.97      0.96      0.96      1107



In [None]:
model.coef_[0]

array([-8.77388537, -8.77388537, -8.77388537, ..., -8.1861643 ,
       -8.39654626, -8.39654626])

In [None]:
word_dict = {k:v for v,k in tf_vectorizer.vocabulary_.items()}
# best ham word
print([word_dict[index] for index in model.coef_[0].argsort()[:10]])
# best spam word
print([word_dict[index] for index in model.coef_[0].argsort()[::-1][:10]])

['aathi', 'mood', 'mood came', 'mood came minuts', 'moral', 'moral even', 'moral even god', 'morning', 'morning dear', 'morning love']
['call', 'free', 'txt', 'text', 'stop', 'mobile', 'claim', 'reply', 'www', 'prize']


In [None]:
msg_val = ['You won luckydraw, claim your prize at Bishan Now.']
print(msg_val)
msg_val = tf_vectorizer.transform(msg_val)
pred = model.predict(msg_val)
print(f'\nThis message is {pred}')

['You won luckydraw, claim your prize at Bishan Now.']

This message is ['spam']


# Sentiment Analysis

In [None]:
stop_words = stopwords.words('english')
wordnet_lemmatizer = WordNetLemmatizer()

def preprocess_text(text, token=True):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = unidecode.unidecode(text)
    text = [t.lower() for t in text.split() if len(t) > 2]
    text = [t for t in text if t not in stop_words]
    # text = [t.lemma_ for t in nlp(' '.join(text))]
    text = [wordnet_lemmatizer.lemmatize(t) for t in text] 
    if token == False:
        text = ' '.join(text)
    return text

In [None]:
positive_reviews = BeautifulSoup(open('data/positive.review').read(), features="html5lib")
positive_reviews = positive_reviews.findAll('review_text', text=True)
positive_reviews = [x.text for x in positive_reviews]
negative_reviews = BeautifulSoup(open('data/negative.review').read(), features="html5lib")
negative_reviews = negative_reviews.findAll('review_text', text=True)
negative_reviews = [x.text for x in negative_reviews]

In [None]:
pos_tokens = [preprocess_text(str(x), False) for x in positive_reviews]
neg_tokens = [preprocess_text(str(x), False) for x in negative_reviews]
full_data = pos_tokens + neg_tokens
y = np.hstack([np.ones(len(pos_tokens)), np.zeros(len(neg_tokens))])

In [None]:
cv = CountVectorizer()
corpus = cv.fit_transform(full_data)
len(cv.vocabulary_)

9201

In [None]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(corpus, y, test_size=0.25, shuffle=True)
model = LogisticRegression()
model.fit(Xtrain, Ytrain)
print("Train accuracy:", model.score(Xtrain, Ytrain))
print("Test accuracy:", model.score(Xtest, Ytest))

Train accuracy: 0.9993333333333333
Test accuracy: 0.772


In [None]:
word_dict = {k:v for v,k in cv.vocabulary_.items()}
# worst word
print([word_dict[index] for index in model.coef_[0].argsort()[:10]])
# best word
print([word_dict[index] for index in model.coef_[0].argsort()[::-1][:10]])

['returned', 'poor', 'return', 'try', 'waste', 'disappointed', 'almost', 'back', 'tried', 'cheap']
['excellent', 'perfect', 'highly', 'great', 'fast', 'memory', 'best', 'awesome', 'expected', 'love']


#  Sentiment Analysis with Semantics

VADER is an NLTK module that provides sentiment scores based on words used ("completely" boosts a score, while "slightly" reduces it), on capitalization & punctuation ("GREAT!!!" is stronger than "great."), and negations (words like "isn't" and "doesn't" affect the outcome).
<br>To view the source code visit https://www.nltk.org/_modules/nltk/sentiment/vader.html

## Basic

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [None]:
text = 'This was a good movie.'
print(text, sid.polarity_scores(text))

text = 'This was a good movie!'
print(text, sid.polarity_scores(text))

text = 'This was a good movie!!!'
print(text, sid.polarity_scores(text))

text = 'This was the worst film to ever disgrace the screen.'
print(text, sid.polarity_scores(text))

This was a good movie. {'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
This was a good movie! {'neg': 0.0, 'neu': 0.484, 'pos': 0.516, 'compound': 0.4926}
This was a good movie!!! {'neg': 0.0, 'neu': 0.443, 'pos': 0.557, 'compound': 0.5826}
This was the worst film to ever disgrace the screen. {'neg': 0.477, 'neu': 0.523, 'pos': 0.0, 'compound': -0.8074}


## Classification

In [None]:
df = pd.read_csv('data/amazonreviews.tsv', sep='\t')
df.dropna(inplace=True)
df = df[~df['review'].str.isspace()]
df.head()

Unnamed: 0,label,review
0,pos,Stuning even for the non-gamer: This sound tra...
1,pos,The best soundtrack ever to anything.: I'm rea...
2,pos,Amazing!: This soundtrack is my favorite music...
3,pos,Excellent Soundtrack: I truly like this soundt...
4,pos,"Remember, Pull Your Jaw Off The Floor After He..."


In [None]:
df['scores'] = df['review'].apply(lambda review: sid.polarity_scores(review))
df['compound']  = df['scores'].apply(lambda score_dict: score_dict['compound'])
df['comp_score'] = df['compound'].apply(lambda c: 'pos' if c >=0 else 'neg')
df.head()

Unnamed: 0,label,review,scores,compound,comp_score
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454,pos
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957,pos
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858,pos
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814,pos
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781,pos


In [None]:
print(confusion_matrix(df['label'],df['comp_score']))
print(classification_report(df['label'],df['comp_score']))

[[2623 2474]
 [ 435 4468]]
              precision    recall  f1-score   support

         neg       0.86      0.51      0.64      5097
         pos       0.64      0.91      0.75      4903

    accuracy                           0.71     10000
   macro avg       0.75      0.71      0.70     10000
weighted avg       0.75      0.71      0.70     10000



# Article Spinner

In [None]:
def preprocess_text(text, token=True):
    text = unidecode.unidecode(text)
    text = text.replace('\n', '')
    text = text.replace('.', '')
    text = [t.lower() for t in text.split()]
    text = [wordnet_lemmatizer.lemmatize(t) for t in text]
    return text

In [None]:
positive_reviews = BeautifulSoup(open('data/positive.review').read())
positive_reviews = positive_reviews.findAll('review_text')
positive_reviews = [x.text for x in positive_reviews]
pos_tokens = [preprocess_text(str(x), False) for x in positive_reviews]

negative_reviews = BeautifulSoup(open('data/negative.review').read())
negative_reviews = negative_reviews.findAll('review_text')
negative_reviews = [x.text for x in negative_reviews]
neg_tokens = [preprocess_text(str(x), False) for x in negative_reviews]

data = pos_tokens + neg_tokens

In [None]:
trigram_dict = defaultdict(list)
trigram_prob = defaultdict(list)
for review in data:
    # keys are the first and third words, key = (review(i), review(i + 2))
    # values are the middle word, values = review[i + 1]
    [trigram_dict[(review[i], review[i + 2])].append(review[i + 1]) for i, _w in enumerate(review[:-2])]

# get counts of word for each key
trigram_count = {k : dict(Counter(trigram_dict[k])) for k in trigram_dict.keys()}

# get probabilies of word for each key
for key, values in trigram_count.items():
    trigram_prob[key] = {k : round(v / len(trigram_dict[key]), 3) for k, v in values.items()}

In [None]:
print(trigram_dict['have', 'been'])
print(trigram_count['have', 'been'])
print(trigram_prob['have', 'been'])

['always', 'always', 'finally', 'never', 'never', 'never', 'always', 'also', 'not', 'always', 'not', 'always', 'not']
{'always': 5, 'finally': 1, 'never': 3, 'also': 1, 'not': 3}
{'always': 0.385, 'finally': 0.077, 'never': 0.231, 'also': 0.077, 'not': 0.231}


In [None]:
# Testing Set
review = random.choice(positive_reviews)
print("Original:", review)
review = preprocess_text(review)

for i, _w in enumerate(review[:-2]):
    # 50% chance of replacement
    k = (review[i], review[i + 2])
    if (random.random() < 0.5) & (k in trigram_prob):
        review[i + 1] = sorted(trigram_prob[k].items(), key=lambda item: item[1])[0][0] + f'({review[i + 1]})'

print("\nSpun:")
print(" ".join(review))

Original: 
This charger is designed for every iPod EXCEPT the Shuffle.  The Shuffle connects via USB, this charger uses FireWire.

Every iPod charges while connected to a computer via FireWire or USB 2.0, but this charger allows you to charge your iPod in a wall outlet.  In addition to the charger you will need a connector cable.  You can use the one that came with your iPod or buy a second one.  Hook 'em up, plug it in the wall and get on with more important things while your iPod charges.

For international travelers, the plug pops off and you can purchase the correct plug for the country you will be visiting.  Great feature!

I have an iPod Dock connected to my stereo and use an extra connection cable along with this plug to ensure that my iPod doesn't run of of juice durring parites and long playing sessions.  I highly recommend the Power Adapter to iPod users who want the flexability to use their iPod on the road or in conjunction with a stereo


Spun:
this charger is(is) designed

# Cipher Decryption (GA and Bigram Prob)

## Configuration

In [None]:
# download the file
if not os.path.exists('./data/moby_dick.txt'):
    r = requests.get('https://lazyprogrammer.me/course_files/moby_dick.txt')
    with open('./data/moby_dick.txt', 'w') as f:
        f.write(r.content.decode())

In [None]:
# one will act as the key, other as the value
letters1 = list(string.ascii_lowercase)
letters2 = list(string.ascii_lowercase)
random.shuffle(letters2)
true_mapping = {k : v for k, v in zip(letters1, letters2)}
# unigram prob
pi = np.zeros(26)
# bigram prob
M_bi = np.ones((26, 26))

In [None]:
def update_pi(ch):
    '''To update the initial state distribution for first letter, unigram'''
    i = ord(ch) - 97
    pi[i] += 1

def update_transition(ch1, ch2):
    '''To update the Markov matrix, bigram'''
    # ord('a') = 97, ord('b') = 98, ...
    i = ord(ch1) - 97
    j = ord(ch2) - 97
    M_bi[i, j] += 1

def get_word_prob(word):
    '''to get the log-probability of a word / token'''
    i = ord(word[0]) - 97
    logp = np.log(pi[i])
    
    for ch in word[1:]:
        j = ord(ch) - 97
        logp += np.log(M_bi[i, j])
        i = j

    return logp

def get_sequence_prob(words):
    '''To get the probability of a sequence of words'''
    # if input is a string, split into an array of tokens
    if type(words) == str:
        words = words.split()
    logp = 0
    for word in words:
        logp += get_word_prob(word)
    return logp

def encode_message(msg):
    '''encode Message'''
    msg = msg.lower()
    msg = regex.sub(' ', msg)

  # make the encoded message
    coded_msg = []
    for ch in msg:
        # if character is non-alpha then it remains
        if ch in true_mapping:
            coded_ch = true_mapping[ch]
        else:
            coded_ch = ch
        coded_msg.append(coded_ch)
    return ''.join(coded_msg)

# a function to decode a message
def decode_message(msg, word_map):
    '''decode Message'''
    decoded_msg = []
    for ch in msg:
        if ch in word_map:
            decoded_ch = word_map[ch]
        else:
            decoded_ch = ch
        decoded_msg.append(decoded_ch)

    return ''.join(decoded_msg)

## Probabilistic Model Learning

In [None]:
# training for probabilities
regex = re.compile('[^a-zA-Z]')
for line in open('./data/moby_dick.txt'):
    line = line.rstrip()
    
    # if line is not blank, replace non-alpha chatacters with space
    if line:
        line = regex.sub(' ', line)
        tokens = line.lower().split()
    
    for token in tokens:
        # first letter
        ch0 = token[0]
        update_pi(ch0)

        for ch1 in token[1:]:
            update_transition(ch0, ch1)
            ch0 = ch1

# normalize the probabilities
pi /= pi.sum()
M_bi /= M_bi.sum(axis=1, keepdims=True)

## Encoding

In [None]:
original_message = '''
I then lounged down the street and found,
as I expected, that there was a mews in a lane which runs down
by one wall of the garden. I lent the ostlers a hand in rubbing
down their horses, and received in exchange twopence, a glass of
half-and-half, two fills of shag tobacco, and as much information
as I could desire about Miss Adler, to say nothing of half a dozen
other people in the neighbourhood in whom I was not in the least
interested, but whose biographies I was compelled to listen to.
'''

In [None]:
# encode the message
encoded_message = encode_message(original_message)
# to decode
decode_message(encoded_message, {y:x for x, y in true_mapping.items()})

' i then lounged down the street and found  as i expected  that there was a mews in a lane which runs down by one wall of the garden  i lent the ostlers a hand in rubbing down their horses  and received in exchange twopence  a glass of half and half  two fills of shag tobacco  and as much information as i could desire about miss adler  to say nothing of half a dozen other people in the neighbourhood in whom i was not in the least interested  but whose biographies i was compelled to listen to  '

## Evolutionary algorithm

In [None]:
def evolve_offspring(dna_pool, n_children):
    '''to create offspring'''
    offspring = []
    
    for dna in dna_pool:
        for _ in range(n_children):
            copy = dna.copy()
            j = np.random.randint(len(copy))
            k = np.random.randint(len(copy))

            # switch position of alphabet
            tmp = copy[j]
            copy[j] = copy[k]
            copy[k] = tmp
            offspring.append(copy)

    return offspring + dna_pool

In [None]:
# initialization
num_iters = 300
scores = np.zeros(num_iters)
best_dna = None
best_map = None
best_score = -np.inf

dna_pool = []
for count in range(50):
    dna = list(string.ascii_lowercase)
    random.shuffle(dna)
    dna_pool.append(dna)

In [None]:
for i in range(num_iters):
    # get offspring from the current dna pool
    # skip this during first run
    if i > 0:
        dna_pool = evolve_offspring(dna_pool, 5)
 
    # calculate score for each dna
    dna2score = {}
    for dna in dna_pool:
        current_map = {k : v for k, v in zip(letters1, dna)}
        decoded_message = decode_message(encoded_message, current_map)
        score = get_sequence_prob(decoded_message)
        # store the score for each map
        dna2score[''.join(dna)] = score
        
        # record the best score
        if score > best_score:
            best_dna = dna
            best_map = current_map
            best_score = score
 
    # store average score of current generation
    scores[i] = np.mean(list(dna2score.values()))
    # keep the best 5 dna from dna_pool (20)
    sorted_dna = sorted(dna2score.items(), key=lambda x: x[1], reverse=True)
    dna_pool = [list(k) for k, v in sorted_dna[:10]]
    
    if i % 100 == 0:
        print("iter:", i, "score:", scores[i], "best so far:", best_score)

iter: 0 score: -2104.098434646843 best so far: -1756.0124422930269
iter: 100 score: -1027.9556394134784 best so far: -931.7456652722506
iter: 200 score: -1054.4234061028674 best so far: -929.9156356016822


In [None]:
# decode message using best map
decoded_message = decode_message(encoded_message, best_map)
print("LL of decoded message:", get_sequence_prob(decoded_message))
print("LL of true message:", get_sequence_prob(regex.sub(' ', original_message.lower())))

# which letters are wrong?
for true, v in true_mapping.items():
    pred = best_map[v]
    if true != pred:
        print("true: %s, pred: %s" % (true, pred))

LL of decoded message: -929.9156356016822
LL of true message: -933.4784822058451
true: k, pred: z
true: z, pred: k


In [None]:
# print the final decoded message
print("Decoded message:\n", textwrap.fill(decoded_message))
print("\nTrue message:\n", original_message)

Decoded message:
  i then lounged down the street and found  as i expected  that there
was a mews in a lane which runs down by one wall of the garden  i lent
the ostlers a hand in rubbing down their horses  and received in
exchange twopence  a glass of half and half  two fills of shag tobacco
and as much information as i could desire about miss adler  to say
nothing of half a doken other people in the neighbourhood in whom i
was not in the least interested  but whose biographies i was compelled
to listen to

True message:
 
I then lounged down the street and found,
as I expected, that there was a mews in a lane which runs down
by one wall of the garden. I lent the ostlers a hand in rubbing
down their horses, and received in exchange twopence, a glass of
half-and-half, two fills of shag tobacco, and as much information
as I could desire about Miss Adler, to say nothing of half a dozen
other people in the neighbourhood in whom I was not in the least
interested, but whose biographies I wa