In [1]:
import pandas as pd
import nltk  
nltk.download('stopwords') 
from nltk.corpus import stopwords 
from nltk.stem.porter import PorterStemmer 
from wordcloud import WordCloud, STOPWORDS
from string import punctuation
from collections import Counter
from nltk.stem.porter import *
import itertools
import re
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
from torch import optim

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print("GPU is available")
else:
    print("GPU is not available, CPU is being used instead")

GPU is available


In [3]:
train_df = pd.read_csv("../input/nlp-getting-started/train.csv")
test_df = pd.read_csv("../input/nlp-getting-started/test.csv")

print("The csv shape: ", str(train_df.shape))
train_df.head()


The csv shape:  (7613, 5)


Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [4]:
test_df.head()

Unnamed: 0,id,keyword,location,text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan


In [5]:
print(" Example text: ", str(train_df["text"][1]), "\n", "Target: ", str(train_df["target"][1]))

 Example text:  Forest fire near La Ronge Sask. Canada 
 Target:  1


In [6]:
total = train_df.isnull().sum().sort_values(ascending = False)
percent = (train_df.isnull().sum())/(train_df.isnull().count()).sort_values(ascending = False)

missing_data = pd.concat([total, percent], axis = 1, keys = ["total", "percent"])
missing_data


Unnamed: 0,total,percent
location,2533,0.33272
keyword,61,0.008013
target,0,0.0
text,0,0.0
id,0,0.0


In [7]:
train_df = train_df.drop(["keyword", "location", "id"], axis = 1)
test_df = test_df.drop(["keyword", "location", "id"], axis = 1)
print("Keyword, Location, and id are all dropped successfully")



Keyword, Location, and id are all dropped successfully


In [8]:
phrase = "Hello,,?? This is my Tweet number 17!!!. I also wanted or been wanting you to try reading this tweet https://www.youtube.com and \
also be writing or seeing https://relentless.com. This is my 28th and and maybe 32nd time seeing this. #ThisisFun"
punct_list = set(punctuation)


def remove_punct(text):
    
    new_text = "".join(ch for ch in text if ch not in punct_list)
    return new_text

def remove_stopwords(text):
    
    text_split = text.split(" ")
    text = [word for word in text_split if word not in STOPWORDS]
    return text

def remove_http(text_list):
    
    new_text = [word for word in text_list if word.find("http") == -1]
    return new_text

def stem_porter(text_list):
    
    stemmer = PorterStemmer()
    
    new_text = [stemmer.stem(word) for word in text_list]
    return new_text

def change_number(text_list):
    
    new_text = []
    for word in text_list:
        if (bool(re.search(r'\d', word)) == False):
            new_text.append(word)
        else:
            new_text.append("||Numeric||")
    
    return new_text

change_number(stem_porter(remove_http(remove_stopwords(remove_punct(phrase.lower())))))

['hello',
 'tweet',
 'number',
 '||Numeric||',
 'want',
 'want',
 'tri',
 'read',
 'tweet',
 'write',
 'see',
 '||Numeric||',
 'mayb',
 '||Numeric||',
 'time',
 'see',
 'thisisfun']

In [9]:
#Explains what stem does
print("Before stem: ")
print(remove_http(remove_stopwords(remove_punct(phrase.lower()))))

print("After stem: ")
print(stem_porter(remove_http(remove_stopwords(remove_punct(phrase.lower())))))

Before stem: 
['hello', 'tweet', 'number', '17', 'wanted', 'wanting', 'try', 'reading', 'tweet', 'writing', 'seeing', '28th', 'maybe', '32nd', 'time', 'seeing', 'thisisfun']
After stem: 
['hello', 'tweet', 'number', '17', 'want', 'want', 'tri', 'read', 'tweet', 'write', 'see', '28th', 'mayb', '32nd', 'time', 'see', 'thisisfun']


In [10]:
def preprocess_text(text):
    
    text = change_number(stem_porter(remove_http(remove_stopwords(remove_punct(text.lower())))))
    return text

preprocess_text(phrase)
train_df["text"] = train_df["text"].apply(lambda x: preprocess_text(x))
test_df["text"] = test_df["text"].apply(lambda x: preprocess_text(x))

In [11]:
#The new preprocessed data
print(train_df["text"])
train_df.head()

0       [deed, reason, earthquak, may, allah, forgiv, us]
1            [forest, fire, near, la, rong, sask, canada]
2       [resid, ask, shelter, place, notifi, offic, ev...
3       [||Numeric||, peopl, receiv, wildfir, evacu, o...
4       [got, sent, photo, rubi, alaska, smoke, wildfi...
                              ...                        
7608    [two, giant, crane, hold, bridg, collaps, near...
7609    [ariaahrari, thetawniest, control, wild, fire,...
7610    [||Numeric||, ||Numeric||, ||Numeric||, s, vol...
7611    [polic, investig, ebik, collid, car, littl, po...
7612    [latest, home, raze, northern, california, wil...
Name: text, Length: 7613, dtype: object


Unnamed: 0,text,target
0,"[deed, reason, earthquak, may, allah, forgiv, us]",1
1,"[forest, fire, near, la, rong, sask, canada]",1
2,"[resid, ask, shelter, place, notifi, offic, ev...",1
3,"[||Numeric||, peopl, receiv, wildfir, evacu, o...",1
4,"[got, sent, photo, rubi, alaska, smoke, wildfi...",1


In [12]:
##### Turn the text into numbers

freq = {}
for row in train_df["text"]:
    
    for word in row:
        if word in freq:
            freq[word] += 1
        else:
            freq[word] = 1



In [13]:
sorted_freq = sorted(freq, key = freq.get, reverse = True)
sorted_freq.remove('')

#Now we have to tokenize
vocab2int = {word: ii + 1 for ii, word in enumerate(sorted_freq)}
int2vocab = {ii + 1: word for ii, word in enumerate(sorted_freq)}

print(dict(itertools.islice(vocab2int.items(), 100)))

{'||Numeric||': 1, 'fire': 2, 'amp': 3, 'im': 4, 'will': 5, 'bomb': 6, 'new': 7, 'via': 8, 'now': 9, 'one': 10, 'peopl': 11, 'go': 12, 'dont': 13, 'news': 14, 'burn': 15, 'kill': 16, 'video': 17, 'flood': 18, 'us': 19, 'emerg': 20, 'crash': 21, 'time': 22, 'disast': 23, 'attack': 24, 'bodi': 25, 'build': 26, 'year': 27, 'look': 28, 'say': 29, 'polic': 30, 'fatal': 31, 'home': 32, 'day': 33, 'love': 34, 'famili': 35, 'evacu': 36, 'train': 37, 'make': 38, 'still': 39, 'come': 40, 'see': 41, 'storm': 42, 'california': 43, 'back': 44, 'know': 45, 'suicid': 46, 'want': 47, 'watch': 48, 'collaps': 49, 'world': 50, 'live': 51, 'bag': 52, 'scream': 53, 'derail': 54, 'got': 55, 'car': 56, 'death': 57, 'man': 58, 'first': 59, 'rt': 60, 'take': 61, 'think': 62, 'caus': 63, 'cant': 64, 'need': 65, 'nuclear': 66, 'work': 67, 'wreck': 68, 'war': 69, 'drown': 70, 'two': 71, 'today': 72, 'youtub': 73, 'destroy': 74, 'accid': 75, 'let': 76, 'deton': 77, 'dead': 78, 'feel': 79, 'plan': 80, 'hijack': 81,

In [14]:
phrase_list = change_number(stem_porter(remove_http(remove_stopwords(remove_punct(phrase.lower())))))
print("Before: ", str(phrase_list))
def tokenize_text(text_list):
    
    int_text = []
    for word in text_list:
        try:
            int_text.append(vocab2int[word])
        except:
            int_text.append(0)
            
    return int_text

print("After: ", str(tokenize_text(phrase_list)))

print("Second test: ")
common_list = ["fire", "bomb", "somemorerandom,,!!", "im", "first", "jibberish", "the", "||Numeric||"]
print(tokenize_text(common_list))

Before:  ['hello', 'tweet', 'number', '||Numeric||', 'want', 'want', 'tri', 'read', 'tweet', 'write', 'see', '||Numeric||', 'mayb', '||Numeric||', 'time', 'see', 'thisisfun']
After:  [1299, 556, 647, 1, 47, 47, 164, 128, 556, 993, 41, 1, 596, 1, 22, 41, 0]
Second test: 
[2, 6, 0, 4, 59, 0, 0, 1]


In [15]:
train_df["text"] = train_df["text"].apply(lambda x: tokenize_text(x))

In [16]:
train_df.head(10)

Unnamed: 0,text,target
0,"[3641, 469, 215, 93, 1471, 2883, 19]",1
1,"[141, 2, 186, 504, 5232, 5233, 1026]",1
2,"[1345, 505, 1810, 373, 5234, 220, 36, 1810, 37...",1
3,"[1, 11, 2401, 99, 36, 331, 43, 0]",1
4,"[55, 1027, 142, 3642, 1620, 197, 99, 2402, 123...",1
5,"[2403, 198, 0, 43, 1346, 1, 332, 782, 486, 867...",1
6,"[18, 23, 708, 175, 63, 709, 18, 438, 5235, 868...",1
7,"[4, 187, 1098, 41, 2, 1811]",1
8,"[257, 20, 36, 199, 9, 26, 710, 438]",1
9,"[4, 2064, 334, 40, 241]",1


In [17]:
max_length = 0
for tweet in train_df["text"]:
    if len(tweet) >= max_length:
        max_length = len(tweet)

        
print("Max Length: ", str(max_length))    
    

Max Length:  51


In [18]:
def pad_text(int_list, sequence_length = 52):
    
    padded_list = np.zeros((sequence_length), dtype = int)
    padded_list[-len(int_list):] = np.array(int_list)[:sequence_length]
    
    return padded_list


int_list = tokenize_text(phrase_list)
print(pad_text(int_list))


[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0 1299  556  647    1   47   47  164
  128  556  993   41    1  596    1   22   41    0]


In [19]:
#We have some rows we need to drop or else the pad won't work
list_of_empty_rows = []
for ii, tweet in enumerate(train_df["text"]):
    if len(tweet) == 0:
        list_of_empty_rows.append(ii)

print(list_of_empty_rows)     
train_df = train_df.drop(list_of_empty_rows)
print("Rows have been dropped")

[6594, 6597, 6602, 6618, 6620, 6623, 6626]
Rows have been dropped


In [20]:
train_df["text"] = train_df["text"].apply(lambda x: pad_text(x))

In [21]:
print("First Review")
print(train_df["text"][0][:])

print("Next Review")
print(train_df["text"][1028][:])

print("Next Review")
print(train_df["text"][456][:])

print("Length of review: ", str(len(train_df["text"][906])))

First Review
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0 3641  469  215   93 1471 2883   19]
Next Review
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0   45  513   25
  896    1   52 3824 3100 1303 1472 3101 6402 3101]
Next Review
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0 5688
    0   19 1185   24   16    1  730  532 5689 5690]
Length of review:  52


## Now I have to create a generator and also split into Valid and Train Data

In [22]:
def get_text(csv_column):
    listed_data = []
    for row in csv_column:
        listed_data.append(row)
        
    return listed_data

def get_target(csv_column):
    listed_data = []
    for row in csv_column:
        listed_row = [row]
        listed_data.append(listed_row)
    return listed_data

In [23]:
train_x = get_text(train_df["text"])
train_y = get_target(train_df["target"])

assert len(train_y) == len(train_x)
print(len(train_x))
print(len(train_y))

7606
7606


## Can use this method to split data if needed

    def split_data(train_x, train_y):
    
    split_index = int(len(train_x) * 0.995)
    train_x, valid_x = train_x[:split_index], train_x[split_index:]
    train_y, valid_y = train_y[:split_index], train_y[split_index:]
    
    return train_x, train_y, valid_x, valid_y 

```  train_x, train_y, valid_x, valid_y = split_data(train_x, train_y) ``` 

In [24]:
assert len(train_x) == len(train_y)
print(len(train_x))
#Uncomment these lines to create datalaoder for validation
# assert len(valid_x) == len(valid_y)
# print(len(valid_x))

7606


In [25]:
def create_dataloader(train_x, train_y, batch_size = 30):
    
    #Make sure to convert from Numpy to Torch Tensor
    train_dataset = TensorDataset(torch.LongTensor(train_x), torch.FloatTensor(train_y))
    
    train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
    
    return train_loader


In [26]:
#Lets create a dataloader
train_loader = create_dataloader(train_x[:7595], train_y[:7595])

In [27]:
#Lets look at the batches of data
train_loader_iter = iter(train_loader)
next(train_loader_iter)[0].shape

torch.Size([30, 52])

## Now we are going to define the model class

In [28]:
class LSTM(nn.Module):
    
    def __init__(self,vocab_size, embedding_dim, hidden_size, n_layers):
        super(LSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm1 = nn.LSTM(embedding_dim, self.hidden_size, num_layers = self.n_layers, dropout = 0.2, batch_first = True)
        self.fc1 = nn.Linear(self.hidden_size, 1)
        
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x, hidden):
        
        batch_size = x.size(0)
        sequence_len = x.size(1)
        
        embeddings = self.embedding(x)
        lstm_out, hidden = self.lstm1(embeddings)
        output = lstm_out.contiguous().view(-1, self.hidden_size)
        
        output = self.sigmoid(self.fc1(output))
        output = output.reshape(batch_size, sequence_len, -1)
        output = output[:, -1]
        
        
        return hidden, output
    
    def init_hidden(self, batch_size):
        
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_size).zero_().cuda())
            
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_size).zero_())
            
        return hidden
        
        
        

## Now we have to define the train function
* forward and backprop function
* The final train function 

In [29]:
def forward_and_backprop(rnn, optimizer, tweet, target, criterion, hidden):
    
    if train_on_gpu:
        rnn.cuda()
    
    hidden = ([each.data for each in hidden])
    optimizer.zero_grad()
    
    hidden, output = rnn(tweet, hidden)
    loss = criterion(output, target)
    
    loss.backward()
    
    nn.utils.clip_grad_norm_(rnn.parameters(), 5)
    optimizer.step()
    
    loss_for_batch = loss.item()
    
    return loss_for_batch, hidden
    
    
    

In [30]:
def train(rnn, epochs, optimizer, criterion, batch_size, train_loader):
    
    rnn.train()
    if train_on_gpu:
        rnn = rnn.cuda()
        
    for epoch in range(1, epochs + 1):
        
        hidden = rnn.init_hidden(batch_size)
        train_loss = 0
        
        for batch_i, (tweet, target) in enumerate(train_loader):
            
            n_batches = len(train_loader.dataset)//batch_size
            if(batch_i > n_batches):
                break
            
            if train_on_gpu:
                tweet = tweet.cuda()
                target = target.cuda() 
            
            batch_loss, batch_hidden = forward_and_backprop(rnn, optimizer, tweet, target, criterion, hidden)
            train_loss += batch_loss
        
        
        print("Epoch Number: ", str(epoch)) 
        print("Train Loss: ", str(train_loss))
        
    
    
        
    
    

## Now Let us define the hyperparameters

In [31]:
epochs = 20
batch_size = 30
lr = 0.001

vocab_size = len(vocab2int)
embedding_dim = 230
hidden_size = 250
num_layers = 2

rnn = LSTM(vocab_size, embedding_dim, hidden_size, num_layers) 

optimizer = optim.Adam(rnn.parameters(), lr = lr)
criterion = nn.BCELoss()

In [32]:
train(rnn, epochs, optimizer, criterion, batch_size, train_loader)

Epoch Number:  1
Train Loss:  142.83650508522987
Epoch Number:  2
Train Loss:  101.51655912399292
Epoch Number:  3
Train Loss:  68.53155351430178
Epoch Number:  4
Train Loss:  38.91419133543968
Epoch Number:  5
Train Loss:  23.600512051023543
Epoch Number:  6
Train Loss:  19.399995845509693
Epoch Number:  7
Train Loss:  14.735474395100027
Epoch Number:  8
Train Loss:  15.630562658014242
Epoch Number:  9
Train Loss:  11.853240346419625
Epoch Number:  10
Train Loss:  11.097251099796267
Epoch Number:  11
Train Loss:  11.177550852102286
Epoch Number:  12
Train Loss:  10.18252025959373
Epoch Number:  13
Train Loss:  7.811310794848396
Epoch Number:  14
Train Loss:  8.59104484825366
Epoch Number:  15
Train Loss:  7.5842379529094615
Epoch Number:  16
Train Loss:  7.079321550174427
Epoch Number:  17
Train Loss:  8.68538865122082
Epoch Number:  18
Train Loss:  11.517663090286078
Epoch Number:  19
Train Loss:  9.071310374762106
Epoch Number:  20
Train Loss:  8.210935624083504


## Now we are going to decide the predict model

In [33]:
def predict(net, test_review):
    
    assert len(test_review) > 0
    
    net.eval()
    preprocessed_data = tokenize_text(preprocess_text(test_review))
    
    assert len(test_review) > 0
    
    padded_data = pad_text(preprocessed_data)
    padded_data = padded_data.reshape(1, -1)
    padded_data = torch.from_numpy(padded_data)
   
    
    batch_size = padded_data.size(0)
    hidden = net.init_hidden(batch_size)
    
    if train_on_gpu:
        padded_data = padded_data.cuda()
    
    hidden, output = net(padded_data, hidden)
    
    print("Unrounded Answer: ", output)
    
    answer = np.round(output.cpu().detach().numpy())
    if answer == 1:
        print("Call in immediate emergency at location")
    else:
        print("General Commentary")
    
    

predict(rnn, "Breaking News: Flooding on streets")
predict(rnn, "Fire ravaged houses next to me and are approaching me")
predict(rnn, "We the best music. We just chillin #Ballin")
predict(rnn, "Smoke in the air. It smells like smoke #Fire")
predict(rnn, "Smoke in the air. It smells like smoke #SaySikeRightNow")
predict(rnn, "I hear strange noises. The wall is shaking")
predict(rnn, "I am on fire with playing this game #TheGOAT")
predict(rnn, "High winds very high winds the ground is shaking")
predict(rnn, "I dont do domestic violence")
predict(rnn, "The houses next to us have burned to pure ash")
predict(rnn, "Breaking News: High Water levels threatening Silicon Valley")
predict(rnn, "Oh no what is happening. Flooding is affected my House")
predict(rnn, "Oh no what is happening. Flooding is affected my House")
predict(rnn, "there is a forest fire at spot pond, geese are fleeing across the street, I cannot save them all")
predict(rnn, "This fire is huge. How I am I supposed to put this out myself bruh")
predict(rnn, "I can a broken car set ablaze on the side of street calling 911")

    

Unrounded Answer:  tensor([[1.0000]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[0.0006]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[4.2263e-05]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[0.9999]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[0.0004]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[0.0917]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[0.0009]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[1.0000]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[0.0006]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[1.000

## Lets test on some test data

In [39]:
predict(rnn, "ACCIDENT - HIT AND RUN - COLD at 500 BLOCK OF SE VISTA TER GRESHAM OR [Gresham Police #PG15000044357...")
predict(rnn, "@DaveOshry @Soembie So if I say that I met her by accident this week- would you be super jelly Dave?...")
predict(rnn, "We're shaking...It's an earthquake")
predict(rnn, "We are still living in the aftershock of Hiroshima people are still the scars of history.' - Edward...")
predict(rnn, "320 [IR] ICEMOON [AFTERSHOCK] | http://t.co/THyzOMVWU0 | @djicemoon | #Dubstep #TrapMusic #DnB #EDM ...")
predict(rnn, "#UPDATE: Picture from the Penn Twp. airplane accident. http://t.co/6JfgDnZRlC")
predict(rnn, "@thugIauren I had myself on airplane mode by accident ??")
predict(rnn, "Typhoon Soudelor kills 28 in China and Taiwan")
predict(rnn, "No I don't like cold!")
predict(rnn, "Not a diss song. People will take 1 thing and run with it. Smh it's an eye opener though. He is abou...")
predict(rnn, "Just got to love burning your self on a damn curling wand... I swear someone needs to take it away f...")
predict(rnn, "I hate badging shit in accident")
predict(rnn, "Horrible Accident Man Died In Wings of AirplaneåÊ(29-07-2015) http://t.co/5ZRKZdhODe")



Unrounded Answer:  tensor([[0.3599]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[0.9996]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[0.9974]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[0.9999]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[7.0223e-05]], device='cuda:0', grad_fn=<SelectBackward>)
General Commentary
Unrounded Answer:  tensor([[1.0000]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[0.9396]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[1.0000]], device='cuda:0', grad_fn=<SelectBackward>)
Call in immediate emergency at location
Unrounded Answer:  tensor([[0.0007]], device='cuda:0', grad_fn=<Select