# IR4QA using Mixture Model

In [1]:
% matplotlib inline

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import re

import time
import datetime

import contractions 
import string
from collections import Counter

# nltk imports
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import difflib

STOP_WORDS = set(stopwords.words('english')) 

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/uffaznathaniel/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/uffaznathaniel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Config

In [3]:
CONFIG_REMOVE_STOP_WORDS = True
CONFIG_STEMMER           = SnowballStemmer('english') # Use None for no stemmer
CONFIG_MAX_FEATURES      = 3000 # None for max_features=size of vocab
CONFIG_NGRAM_RANGE       = (1, 1) # (3,3)

TOKEN_STEMMER = SnowballStemmer("english")
TOKEN_LEMMATIZER = WordNetLemmatizer()

## Read the files

In [4]:
CONFIG_FAQ_FILEPATH = "./anon-qrels.txt"
CONFIG_FAQ_CATEGORY_FILEPATH = "./categories.txt"
CONFIG_STRING_SIMILARITY = 0.85

def string_similary(a, b):
    # https://stackoverflow.com/a/1471603
    seq=difflib.SequenceMatcher(a=a.lower(), b=b.lower())
    return seq.ratio()

def are_string_similar(a, b):
    return string_similary(a, b) > CONFIG_STRING_SIMILARITY

def read_faq():
    with open(CONFIG_FAQ_FILEPATH) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    
    faqs = {}
    for line in content:
        linelower = line.lower()
        if linelower.startswith('question'): # question
            parts = re.split(r'\t+', line)
            id = parts[1]
            yahoo_id = parts[2]
            question = line.split(yahoo_id)[1].strip()
            r = {
                'id': id,
                'question': question,
                'yahoo_id': yahoo_id,
                'answers': [],
                'category': None
            }
            faqs[id] = r
        else:
            parts = re.split(r'\t+', line)
            id = parts[0]
            yahoo_id = parts[1]
            rank = parts[2]
            answer = line.split(yahoo_id + "\t" + rank)[1].strip()
            # there are duplicates. So check to see if there are
            # similar strings
            """found_similar = False
            
            for a in faqs[id]['answers']:
                if are_string_similar(a[1], answer):
                    found_similar = True
                    break
            
            if not found_similar:"""
            faqs[id]['answers'].append((rank, answer))
    
    # Sort the answers
    for qid in faqs:
        faqs[qid]['answers'].sort(key=lambda tup: tup[0], reverse=True)
    
    # Determine the categories
    with open(CONFIG_FAQ_CATEGORY_FILEPATH) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    
    categories = set()
    
    for line in content:
        if len(line.strip()) != 0:
            try:
                idx = line.index(' ')
                id = line[:idx]
                category = line[idx:].strip()
                if len(category) > 0 and category[0].isalpha():
                    faqs[id]['category'] = category
                    categories.add(category)
            except ValueError as e:
                pass
                 
    return faqs, categories

FAQS = read_faq()
CATEGORIES = FAQS[1]
FAQS = FAQS[0]


#FAQS['5002']['answers']

## Categories

In [5]:
CATEGORIES

{'Air Travel',
 'Allergies',
 'Alternative Medicine',
 'Baseball',
 'Basketball',
 'Birds',
 'Bologna',
 'Books & Authors',
 'Boxing',
 'Cancer',
 'Cats',
 'Chicago',
 'China',
 'Cleaning & Laundry',
 'Cleveland',
 'Cricket',
 'Cycling',
 'Da Lat',
 'Da Nang',
 'Dancing',
 'Decorating & Remodeling',
 'Dental',
 'Denver',
 'Detroit',
 'Diabetes',
 'Diet & Fitness',
 'Do It Yourself (DIY)',
 'Dogs',
 'Drawing & Illustration',
 'Egypt',
 'Fantasy Sports',
 'Fashion & Accessories',
 'First Aid',
 'Fish',
 'Fishing',
 'Football (American)',
 'Garden & Landscape',
 'Genealogy',
 'Golf',
 'Guatemala',
 'Hair',
 'Heart Diseases',
 'History',
 'Houston',
 'Hunting',
 'Ice Skating',
 'Infectious Diseases',
 'Injuries',
 'Israel',
 'Japan',
 'London',
 'Los Angeles',
 'Maintenance & Repairs',
 'Makeup',
 'Martial Arts',
 "Men's Health",
 'Mental Health',
 'Montreal',
 'Netherlands',
 'New York City',
 'Optical',
 'Orlando',
 'Other - Beauty & Style',
 'Other - Canada',
 'Other - Diseases',
 'Othe

In [7]:
'Poetry' in CATEGORIES

True

## TREC 2015

In [6]:
LIVE_QA_DATE = '08/31/2015'
LIVE_QA_DATE = datetime.datetime.strptime(LIVE_QA_DATE, "%m/%d/%Y")
LIVE_QA_DATE_UNIX = time.mktime(LIVE_QA_DATE.timetuple())

LIVE_QA_DATE_UNIX

1441004400.0

## Yahoo! Answers Comprehensive Questions and Answers version 1.0 

#### Parser

In [9]:
from html.parser import HTMLParser

class MLStripper(HTMLParser):
    def __init__(self):
        self.reset()
        self.strict = False
        self.convert_charrefs= True
        self.fed = []
    def handle_data(self, d):
        self.fed.append(d)
    def get_data(self):
        return ''.join(self.fed)

def strip_tags(html):
    try:
        if not html:
            return ''
        s = MLStripper()
        s.feed(html)
        return s.get_data()
    except:
        return html

In [10]:
strip_tags('Cooking &amp; Recipes')

'Cooking & Recipes'

In [12]:
FILE_PATH = './data/combined/FullOct2007.xml'

from xml.etree import ElementTree as ET

parser = ET.iterparse(FILE_PATH, events=("start","end"))

should_add = True
valid_category = False
o = { 'q': None, 'a': None}
parsed_results = []
for event, element in parser:
    if element.tag == 'vespaadd' and event == "start":
        o              = { 'q': None, 'a': None}
        should_add     = True
        valid_category = False
    
    # check dates that are less than Aug 31, 2015
    #if should_add and element.tag == 'date' and event == "end":
    #    if int(element.text) > LIVE_QA_DATE_UNIX:
    #        should_add = False
    
    # Question
    if event == 'end':
        if element.tag == 'subject':
            o['q'] = element.text
        if element.tag == 'content':
            o['q'] = o['q'] + ' ' + element.text

        # Answer
        if element.tag == 'bestanswer':
            o['a'] = element.text
            
        # Categories
        if element.tag == 'cat' or element.tag == 'maincat' or element.tag == 'subcat':
            if strip_tags(element.text) in CATEGORIES:
                valid_category = True

        # Add the element
        if element.tag == 'vespaadd' and should_add and valid_category:
            #o['q'] = strip_tags(o['q'])
            #o['a'] = strip_tags(o['a'])
            parsed_results.append(o)
        
len(parsed_results)

834843

In [9]:
# Unpickle
#infile = open(FILE_PATH + '.pickle','rb')
#parsed_results = pickle.load(infile)
#infile.close()

In [13]:
PARSED_RESULTS = parsed_results

## Vectorizer

In [16]:
def tokenize(t):
    t = t.lower()
    t = contractions.fix(t) # fix contractions
    # fix SMS slag
    # morphological differences
    # https://pdfs.semanticscholar.org/5988/ef005467f17fbd1d5dccc40f6541d8e9cd28.pdf
    tokenizer = nltk.tokenize.RegexpTokenizer(r'\w+')
    #tokens = nltk.word_tokenize(t)
    tokens = tokenizer.tokenize(t)
    tokens = [w for w in tokens if not w in STOP_WORDS]
    #if TOKEN_STEMMER:
    #    tokens = [TOKEN_STEMMER.stem(w) for w in tokens]
    if TOKEN_LEMMATIZER:
        tokens = [TOKEN_LEMMATIZER.lemmatize(w) for w in tokens]
    return tokens

def normalize_text(t):
    t = t.lower()
    t = contractions.fix(t)
    return t

def custom_process_word(w):
    if CONFIG_REMOVE_STOP_WORDS and TOKEN_STEMMER:
        w = TOKEN_STEMMER.stem(w)
    if TOKEN_LEMMATIZER:
        w = TOKEN_LEMMATIZER.lemmatize(w)
    return w


class CustomCountVectorizer(CountVectorizer):
    def build_analyzer(self):
        # See https://stackoverflow.com/a/41377484
        analyzer = super(CustomCountVectorizer, self).build_analyzer()
        return lambda doc: ([custom_process_word(w) for w in analyzer(doc)])

In [9]:
def get_vectorizer():
    corpus = []
    for qid in FAQS:
        faq = FAQS[qid]
        question = faq['question']
        corpus.append(normalize_text(question))
    
    # Read more: http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
    vectorizer = CustomCountVectorizer(max_features=CONFIG_MAX_FEATURES,
                                           stop_words='english',
                                           ngram_range=CONFIG_NGRAM_RANGE)
    X = vectorizer.fit_transform(corpus)
    return vectorizer, X

VECTORIZER, VECTORS = get_vectorizer()
VECTORS = VECTORS.toarray()

In [17]:
def get_vectorizer():
    corpus = []
    for o in PARSED_RESULTS:
        corpus.append(normalize_text(o['q']))
    
    # Read more: http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
    vectorizer = CustomCountVectorizer(max_features=CONFIG_MAX_FEATURES,
                                           stop_words='english',
                                           ngram_range=CONFIG_NGRAM_RANGE)
    X = vectorizer.fit_transform(corpus)
    return vectorizer, X

VECTORIZER, VECTORS = get_vectorizer()
VECTORS = VECTORS.toarray()

In [19]:
len(VECTORS)

834843

### Unigram statistical model

In [21]:
LAMBDA = 0.5

TOTAL_IN_CORPUS = np.zeros(len(VECTORS[0]))
DOC_COUNTS = np.zeros(len(VECTORS))

TOTAL_COUNT_V = 0

# Total in corpus
for v in VECTORS:
    i = 0
    for x in v:
        TOTAL_IN_CORPUS[i] = TOTAL_IN_CORPUS[i] + x
        i = i + 1
        
# Doc counts
i = 0
for v in VECTORS:
    DOC_COUNTS[i] = np.sum(v)
    i = i + 1
    
TOTAL_COUNT_V = np.sum(TOTAL_IN_CORPUS)

### Old

In [11]:
def get_counter():
    r = {}
    for qid in FAQS:
        r[qid] = 0
    return r

def get_qid_by_index(i):
    j = 0
    for qid in FAQS:
        if i == j:
            return qid
        j = j + 1
    return -1

def unigram_stats_model(text):
    text = normalize_text(text)
    X = VECTORIZER.transform([text]).toarray()[0]
    counter = get_counter()
    i = 0
    for doc_count in DOC_COUNTS:
        qid = get_qid_by_index(i)
        j = 0
        product = 1
        found_terms = False
        for q in X:
            if q > 0:
                td = VECTORS[i][j]
                tc = TOTAL_IN_CORPUS[j]
                product = product * ((LAMBDA*(td/doc_count)) + ((1 - LAMBDA)*tc/TOTAL_COUNT_V))
                found_terms = True
            j = j + 1
        counter[qid] = product if found_terms else 0
        i = i + 1
    
    return Counter(counter)

t = unigram_stats_model("My mom died when I was 8 and I have an awful dad when it comes to health and body things. I never knew u needed to clean your belly button?	I recent")
t.most_common(10)



[('5002', 1.4790695173055661e-23),
 ('6004', 6.488639916528648e-42),
 ('5331', 2.9011358148731388e-42),
 ('5980', 2.7721232364793048e-42),
 ('5074', 1.335655068407521e-42),
 ('5829', 1.2785389767542065e-42),
 ('5490', 6.7685462338873521e-43),
 ('5222', 6.4597819641831791e-43),
 ('5837', 4.6970349037291821e-43),
 ('5277', 2.6360149976860792e-43)]

### New Unigram Model

In [24]:
from collections import defaultdict

# Credits https://stackoverflow.com/a/15428658
class MostCommon(object):
    """Keep track the top-k key-value pairs.

    Attributes:
        top: Integer representing the top-k items to keep track of.
        store: Dictionary of the top-k items.
        min: The current minimum of any top-k item.
        min_set: Set where keys are counts, and values are the set of
            keys with that count.
    """
    def __init__(self, top):
        """Create a new MostCommon object to track key-value paris.

        Args:
            top: Integer representing the top-k values to keep track of.
        """
        self.top = top
        self.store = dict()
        self.min = None
        self.min_set = defaultdict(set)

    def _update_existing(self, key, value):
        """Update an item that is already one of the top-k values."""
        # Currently handle values that are non-decreasing.
        assert value > self.store[key]
        self.min_set[self.store[key]].remove(key)
        if self.store[key] == self.min:  # Previously was the minimum.
            if not self.min_set[self.store[key]]:  # No more minimums.
                del self.min_set[self.store[key]]
                self.min_set[value].add(key)
                self.min = min(self.min_set.keys())
        self.min_set[value].add(key)
        self.store[key] = value

    def __contains__(self, key):
        """Boolean if the key is one of the top-k items."""
        return key in self.store

    def __setitem__(self, key, value):
        """Assign a value to a key.

        The item won't be stored if it is less than the minimum (and
        the store is already full). If the item is already in the store,
        the value will be updated along with the `min` if necessary.
        """
        # Store it if we aren't full yet.
        if len(self.store) < self.top:
            if key in self.store:  # We already have this item.
                self._update_existing(key, value)
            else:  # Brand new item.
                self.store[key] = value
                self.min_set[value].add(key)
                if value < self.min or self.min is None:
                    self.min = value
        else:  # We're full. The value must be greater minimum to be added.
            if value > self.min:  # New item must be larger than current min.
                if key in self.store:  # We already have this item.
                    self._update_existing(key, value)
                else:  # Brand new item.
                    # Make room by removing one of the current minimums.
                    old = self.min_set[self.min].pop()
                    del self.store[old]
                    # Delete the set if there are no old minimums left.
                    if not self.min_set[self.min]:
                        del self.min_set[self.min]
                    # Add the new item.
                    self.min_set[value].add(key)
                    self.store[key] = value
                    self.min = min(self.min_set.keys())

    def __repr__(self):
        if len(self.store) < 10:
            store = repr(self.store)
        else:
            length = len(self.store)
            largest = max(self.store.itervalues())
            store = '<len={length}, max={largest}>'.format(length=length,
                                                           largest=largest)
        return ('{self.__class__.__name__}(top={self.top}, min={self.min}, '
                'store={store})'.format(self=self, store=store))

In [31]:
def unigram_stats_model_new(text):
    text = normalize_text(text)
    X = VECTORIZER.transform([text]).toarray()[0]
    counter = get_counter2()
    i = 0
    for doc_count in DOC_COUNTS:
        j = 0
        product = 1
        found_terms = False
        for q in X:
            if q > 0:
                td = VECTORS[i][j]
                tc = TOTAL_IN_CORPUS[j]
                product = product * ((LAMBDA*(td/doc_count)) + ((1 - LAMBDA)*tc/TOTAL_COUNT_V))
                found_terms = True
            j = j + 1
        counter[i] = product if found_terms else 0
        i = i + 1
    
    return Counter(counter)

t = unigram_stats_model_new("My mom died when I was 8 and I have an awful dad when it comes to health and body things. I never knew u needed to clean your belly button?	I recent")
t.most_common(10)

  


[(779404, 6.9959995044090663e-38),
 (786133, 3.106743169070274e-38),
 (556282, 1.6215100731268191e-38),
 (636219, 7.4305363584014381e-39),
 (188444, 3.796933768823638e-39),
 (178135, 2.2335225489007395e-39),
 (222050, 1.6742762843689603e-39),
 (781251, 1.6250045183074381e-39),
 (606906, 1.1163441336002996e-39),
 (131210, 9.3051035660936292e-40)]

In [36]:
PARSED_RESULTS[131210]

{'a': "Definitely clean it- try this... wiggle your finger around in your belly button for a few seconds then smell it. If you're a button cleaner it won't smell, but if you're not then you've just smelt summat rancid yeah? When you're in the shower clean it with a facecloth, simple!",
 'q': 'do we need to clean our belly buttons? and if we do, how? i know, kinda gross right? but just wonderin.'}

In [42]:
ALL_TOP_50_RESULTS = []
ii = 1
for qid in FAQS:
    question = FAQS[qid]['question']
    t = unigram_stats_model_new(question)
    top50 = t.most_common(50)
    ALL_TOP_50_RESULTS.append((qid, top50))
    print('Processed ' + str(ii) + 'out of ' + str(len(FAQS)))
    ii = ii + 1


  


Processed 1out of 1015
Processed 2out of 1015
Processed 3out of 1015
Processed 4out of 1015
Processed 5out of 1015
Processed 6out of 1015
Processed 7out of 1015
Processed 8out of 1015
Processed 9out of 1015
Processed 10out of 1015
Processed 11out of 1015


KeyboardInterrupt: 

In [44]:
ALL_TOP_50_RESULTS

[('5001',
  [(87064, 1.4889475414717762e-146),
   (736820, 1.0581363558508021e-148),
   (719120, 9.5921165305453891e-149),
   (1232, 4.3472570248359668e-150),
   (714915, 4.021131373884718e-150),
   (643610, 3.6968471514895852e-150),
   (556098, 3.5589920982695519e-150),
   (633666, 1.7725253792776714e-150),
   (639956, 1.1971521223975428e-150),
   (691094, 2.8478460917347984e-151),
   (608253, 2.4420242121967479e-151),
   (183666, 1.8056762026002409e-151),
   (109244, 1.3968436388909872e-151),
   (676088, 8.4509059067682671e-152),
   (670666, 6.9336996727622258e-152),
   (367890, 5.9178942304808119e-152),
   (615776, 4.0533349169151859e-152),
   (170184, 3.883154391484233e-152),
   (151500, 3.6591960616860285e-152),
   (456107, 3.6016748033493374e-152),
   (745930, 3.5904157076387778e-152),
   (566381, 3.5875500284201433e-152),
   (515372, 3.5232253489098993e-152),
   (558121, 3.2196985450781329e-152),
   (328658, 2.5558612079910782e-152),
   (657790, 2.4699702119239436e-152),
   (393

In [45]:
FAQS['5001']

{'answers': [('4', "Don't seek help here...Go and see a doctor immediately!"),
  ('4',
   'Go see a doctor. This is a sign of something more serious. You have nerve damage happening on your left side for whatever reason ~ see a doctor who can fix it ASAP. Before it gets worse or there is permanent damage. I had a tenseness on my left side in my arms and legs ~ 5 years later I was diagnosed with rumatoid arthritis.'),
  ('4',
   'Go see a doctor. This is a sign of something more serious. You have nerve damage happening on your left side for whatever reason ~ see a doctor who can fix it ASAP. Before it gets worse or there is permanent damage./n/nI had a tenseness on my left side in my arms and legs ~ 5 years later I was diagnosed with rumatoid arthritis.'),
  ('4', "Don't seek help here...Go and see a doctor immediately!"),
  ('3',
   '| Did you find this post helpful? You marked this post as helpful! I changed my mind Hozzer8 replied November 24th, 2013 I started the same symptoms in 20

In [46]:
PARSED_RESULTS[87064]

{'a': 'Hiatus Hernia  See a doctor.\n\nhttp://www.hernia.org/manjava.html',
 'q': 'i have pain under my right breast that feels like a pulled muscle that comes and goes. I can see it buldging? when this pain occurs i can see it buldging and i can place my hand there and feel it as well. it lasts for about 40 seconds to a minute. It often happens when i am in the tub and sitting up. when i lay down and push down on the buldging muscle it goes away. Why is this happening'}

In [108]:
for a_pair in ALL_TOP_50_RESULTS[1][1]:
    print(PARSED_RESULTS[a_pair[0]]['q'])
    print('--------')

print("##############")
a_pair = ALL_TOP_50_RESULTS[1][1][10]
print(PARSED_RESULTS[a_pair[0]]['q'])
print("##############")
a_pair = ALL_TOP_50_RESULTS[1][1][0]
print(PARSED_RESULTS[a_pair[0]]['q'])
print("##############")
a_pair = ALL_TOP_50_RESULTS[1][1][9]
print(PARSED_RESULTS[a_pair[0]]['q'])

Weird! Puss coming out of my bellybutton....is this normal? So just recently, I started noticing that I had like dried puss or something around my belly button. I took a Q-tip and cleaned it out and by the end of the night it was there again. It's really disgusting and it smells really bad. I have my belly button pierced but I don't think that is the cause. It seems to be coming from inside the belly button. What could be the cause of this and is it serious???
--------
my finace's belly button is pretty deep how deep should they be? are there any health issues that come  with a deep navel
--------
How do you clean ur belly button?So it wont look brown and dirty?
--------
I am looking for a doctor to cleans me of candida? I took the health check-up self test in the book '14-Day Health Cleansing' and the 'Do You Have CRC in the book Candida Related Complex.  After reading this information, I knew my body needed some detoxification. Can you help me find a doctor who will do this for me?
-

In [107]:
for a_pair in ALL_TOP_50_RESULTS[1][1]:
    print(PARSED_RESULTS[a_pair[0]]['a'])
    print('--------')
    
print("##############")
a_pair = ALL_TOP_50_RESULTS[1][1][33]
print(PARSED_RESULTS[a_pair[0]]['a'])
print("##############")
a_pair = ALL_TOP_50_RESULTS[1][1][40]
print(PARSED_RESULTS[a_pair[0]]['a'])
print("##############")
a_pair = ALL_TOP_50_RESULTS[1][1][10]
print(PARSED_RESULTS[a_pair[0]]['a'])

Obviously you have an infection.  I would be VERY surprised if it didn't have something to do with your piercing.  But you should go to the doctor and have them check it.  Whatever is going on, you need at least an antibiotic...possibly pills AND ointment.  Depends on what is infected and how badly.
--------
deep navel??? is he fat or too thin ? may b itz just natural or i think u should consult a doctor and ask..coz if it is then u know the sooner you know bout the problem the better...best of luck!
--------
A good way is to wet a cotton swab and use that to clear out dirt.  Don't press to hard with it or it will hurt though......just enough to get the gunk out.
--------
Dear friend,
I am not a doctor but have successfully curbed the candida infections with alternative Kefer probotic culture yogurt made fresh a home in Soya milk no gm's increasing the growth of friendly bacteria which eats the unfriendly sister bacterial that grown the candida yeast growth, also drink plenty of filter

### Second pass: Relevance measure Varanasi approach

In [66]:
WORD = re.compile(r'\w+')

def get_cosine(vec1, vec2):
     intersection = set(vec1.keys()) & set(vec2.keys())
     numerator = sum([vec1[x] * vec2[x] for x in intersection])

     sum1 = sum([vec1[x]**2 for x in vec1.keys()])
     sum2 = sum([vec2[x]**2 for x in vec2.keys()])
     denominator = math.sqrt(sum1) * math.sqrt(sum2)

     if not denominator:
        return 0.0
     else:
        return float(numerator) / denominator

def text_to_vector(text):
     words = WORD.findall(text)
     return Counter(words)

text1 = 'This is a foo bar sentence .'
text2 = 'This sentence is similar to a foo bar sentence .'

vector1 = text_to_vector(text1)
vector2 = text_to_vector(text2)

get_cosine(vector1, vector2)

0.8616404368553293

In [152]:
POS = 9
QID = ALL_TOP_50_RESULTS[POS][0]
Q_RESULTS = ALL_TOP_50_RESULTS[POS][1]
Q_RESULTS
q_vec = text_to_vector(FAQS[QID]['question'])
ii = 0
for a_pair in Q_RESULTS:
    original_score = a_pair[1]
    q_v2 = text_to_vector(PARSED_RESULTS[a_pair[0]]['q'])
    score = get_cosine(q_vec, q_v2)
    print(str(ii) + ". New score: " + str(score))
    print('--------')
    ii = ii + 1

0. New score: 0.22435206026247326
--------
1. New score: 0.1538643637241659
--------
2. New score: 0.26111648393354675
--------
3. New score: 0.211950261485958
--------
4. New score: 0.11677484162422842
--------
5. New score: 0.21885688981825285
--------
6. New score: 0.21885688981825285
--------
7. New score: 0.17195898485719865
--------
8. New score: 0.38380515539319854
--------
9. New score: 0.11313893217116627
--------
10. New score: 0.16972865106406024
--------
11. New score: 0.2161767426939856
--------
12. New score: 0.2339548000893516
--------
13. New score: 0.24618298195866545
--------
14. New score: 0.18045622869521083
--------
15. New score: 0.1846372364689991
--------
16. New score: 0.32828533472737925
--------
17. New score: 0.19894589252079753
--------
18. New score: 0.29008478786241815
--------
19. New score: 0.1846372364689991
--------
20. New score: 0.19677535089674902
--------
21. New score: 0.302407368379503
--------
22. New score: 0.3046358979224712
--------
23. New 

In [82]:
len(FAQS)

1015

In [153]:
POS = 9
INDEX = 8
a_pair = ALL_TOP_50_RESULTS[POS][1][INDEX]
print(PARSED_RESULTS[a_pair[0]]['q'])
print("####")
print(PARSED_RESULTS[a_pair[0]]['a'])

Length or diameter? this is a serious question and i want some good anwers. all the guys are worrying about making there penises longer. they'll say stuff like its only 6 inch they want it to be longer . but from what i know a woman has 90% of the nerves or whatever it is in the first 3 inches of her vagina. so the question is wouldn't diameter matter more than length? if not then please explan it to me.
####
To secksy chic - go check an anatomy diagram.  The G spot is NOT six inches in, it is easily reached with your thumb, so it's only about 3 inches in at the front.

As for length versus girth, personally I prefer girth, as a long penis, anything longer than 6 inches, goes in too far and is painful.  But too much girth is not good either.  I have had both and I much prefer an average size penis attached to a guy who knows how to use it.

I think the idea of a really big penis is only appealing to look at and to compete with other guys.  Functionally it works just the same.

So don't