diff --git a/.gitignore b/.gitignore index cea410c7..1be68af1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# VSCode config +.vscode + # JupyterLab temp files .virtual_documents diff --git a/config/dev_reqs.txt b/config/dev_reqs.txt index b1ff8eaa..500793df 100644 --- a/config/dev_reqs.txt +++ b/config/dev_reqs.txt @@ -7,10 +7,11 @@ pytest pyyaml transformers>=3.0.0 # SpaCy models aren't stable across point releases -spacy==3.0.5 +spacy==3.6.0 ipywidgets ibm-watson twine +hypothesis # Documentation-related requirements have moved to non_36_reqs.txt. #sphinx diff --git a/config/non_36_reqs.txt b/config/non_36_reqs.txt index f6877e06..98e53e16 100644 --- a/config/non_36_reqs.txt +++ b/config/non_36_reqs.txt @@ -4,8 +4,14 @@ # This list will probably grow as libraries drop support. nltk -ray[default] -feather +ray[default] >= 2.0 + +# *** HACK ALERT *** +# Feather depends on Numpy being exactly 1.20.2, which breaks Pandas. +# So we don't include a dependency on it and hope for the best. +#feather +# *** END HACK *** + sphinx sphinxcontrib-apidoc diff --git a/notebooks/.gitignore b/notebooks/.gitignore new file mode 100644 index 00000000..217a2208 --- /dev/null +++ b/notebooks/.gitignore @@ -0,0 +1,2 @@ +CoNLL_u_test_inputs + diff --git a/notebooks/Analyze_Model_Outputs.ipynb b/notebooks/Analyze_Model_Outputs.ipynb index aaa1513f..102efeac 100644 --- a/notebooks/Analyze_Model_Outputs.ipynb +++ b/notebooks/Analyze_Model_Outputs.ipynb @@ -30,7 +30,9 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import os\n", @@ -72,7 +74,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "data": { @@ -323,7 +327,9 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "data": { @@ -504,7 +510,9 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "data": { @@ -666,7 +674,9 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "data": { @@ -4584,12 +4594,12 @@ } ], "source": [ - "from IPython.core.display import display, HTML\n", - "display(HTML(\"

PER entities in corpus for document 75:

\"))\n", - "display(corpus_person[75])\n", + "from IPython import display\n", + "display.display(display.HTML(\"

PER entities in corpus for document 75:

\"))\n", + "display.display(corpus_person[75])\n", "\n", - "display(HTML(\"

PER entities in model outputs for document 75:

\"))\n", - "display(bender_person[75])" + "display.display(display.HTML(\"

PER entities in model outputs for document 75:

\"))\n", + "display.display(bender_person[75])" ] }, { @@ -5382,7 +5392,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/notebooks/Analyze_Text.ipynb b/notebooks/Analyze_Text.ipynb index b0509c55..837e9816 100644 --- a/notebooks/Analyze_Text.ipynb +++ b/notebooks/Analyze_Text.ipynb @@ -143,7 +143,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -194,11 +194,11 @@ } ], "source": [ - "from IPython.core.display import display, HTML\n", + "from IPython.display import display, HTML\n", "doc_file = \"../resources/holy_grail_short.txt\"\n", "with open(doc_file, \"r\") as f:\n", " doc_text = f.read()\n", - " \n", + "\n", "display(HTML(f\"Document Text:
{doc_text}
\"))" ] }, @@ -376,9 +376,9 @@ " 'lemma': 'throughout'},\n", " {'text': 'Britain', 'part_of_speech': 'PROPN', 'location': [64, 71]},\n", " {'text': 'searching',\n", - " 'part_of_speech': 'VERB',\n", + " 'part_of_speech': 'NOUN',\n", " 'location': [72, 81],\n", - " 'lemma': 'search'},\n", + " 'lemma': 'searching'},\n", " {'text': 'for',\n", " 'part_of_speech': 'ADP',\n", " 'location': [82, 85],\n", @@ -412,13 +412,13 @@ " 'location': [113, 116],\n", " 'lemma': 'the'},\n", " {'text': 'Round',\n", - " 'part_of_speech': 'PROPN',\n", + " 'part_of_speech': 'ADJ',\n", " 'location': [117, 122],\n", - " 'lemma': 'Round'},\n", + " 'lemma': 'round'},\n", " {'text': 'Table',\n", - " 'part_of_speech': 'PROPN',\n", + " 'part_of_speech': 'NOUN',\n", " 'location': [123, 128],\n", - " 'lemma': 'Table'},\n", + " 'lemma': 'table'},\n", " {'text': '.', 'part_of_speech': 'PUNCT', 'location': [128, 129]},\n", " {'text': 'Along',\n", " 'part_of_speech': 'ADP',\n", @@ -492,10 +492,7 @@ " 'part_of_speech': 'DET',\n", " 'location': [236, 239],\n", " 'lemma': 'the'},\n", - " {'text': 'Not',\n", - " 'part_of_speech': 'ADV',\n", - " 'location': [240, 243],\n", - " 'lemma': 'not'},\n", + " {'text': 'Not', 'part_of_speech': 'PROPN', 'location': [240, 243]},\n", " {'text': '-', 'part_of_speech': 'PUNCT', 'location': [243, 244]},\n", " {'text': 'Quite', 'part_of_speech': 'PROPN', 'location': [244, 249]},\n", " {'text': '-', 'part_of_speech': 'PUNCT', 'location': [249, 250]},\n", @@ -505,9 +502,9 @@ " 'lemma': 'so'},\n", " {'text': '-', 'part_of_speech': 'PUNCT', 'location': [252, 253]},\n", " {'text': 'Brave',\n", - " 'part_of_speech': 'ADJ',\n", + " 'part_of_speech': 'PROPN',\n", " 'location': [253, 258],\n", - " 'lemma': 'brave'},\n", + " 'lemma': 'Brave'},\n", " {'text': '-', 'part_of_speech': 'PUNCT', 'location': [258, 259]},\n", " {'text': 'as',\n", " 'part_of_speech': 'ADP',\n", @@ -698,10 +695,7 @@ " 'part_of_speech': 'VERB',\n", " 'location': [521, 525],\n", " 'lemma': 'turn'},\n", - " {'text': 'away',\n", - " 'part_of_speech': 'ADV',\n", - " 'location': [526, 530],\n", - " 'lemma': 'away'},\n", + " {'text': 'away', 'part_of_speech': 'ADP', 'location': [526, 530]},\n", " {'text': ',', 'part_of_speech': 'PUNCT', 'location': [530, 531]},\n", " {'text': 'God',\n", " 'part_of_speech': 'PROPN',\n", @@ -4726,7 +4720,7 @@ " [362, 368): 'Arthur'\n", " 0.996876\n", " positive\n", - " 0.721918\n", + " 0.721919\n", " 0.311653\n", " 2\n", " 0.999918\n", @@ -4741,7 +4735,7 @@ " [587, 593): 'Arthur'\n", " 0.973795\n", " positive\n", - " 0.721918\n", + " 0.721919\n", " 0.311653\n", " 2\n", " 0.999918\n", @@ -4759,8 +4753,8 @@ "1 Person Arthur [587, 593): 'Arthur' 0.973795 positive \n", "\n", " sentiment.score relevance count confidence_entity \\\n", - "0 0.721918 0.311653 2 0.999918 \n", - "1 0.721918 0.311653 2 0.999918 \n", + "0 0.721919 0.311653 2 0.999918 \n", + "1 0.721919 0.311653 2 0.999918 \n", "\n", " disambiguation.subtype disambiguation.name disambiguation.dbpedia_resource \n", "0 None None None \n", @@ -4827,12 +4821,12 @@ " Sir Bedevere\n", " positive\n", " 0.835873\n", - " 0.897263\n", - " 0.046902\n", - " 0.810654\n", - " 0.016340\n", - " 0.095661\n", - " 0.021033\n", + " 0.884359\n", + " 0.031301\n", + " 0.496318\n", + " 0.135650\n", + " 0.015545\n", + " 0.022961\n", " 1\n", " \n", " \n", @@ -4840,12 +4834,12 @@ " King Arthur\n", " neutral\n", " 0.000000\n", - " 0.852288\n", - " 0.062558\n", - " 0.620066\n", - " 0.054894\n", - " 0.088147\n", - " 0.182329\n", + " 0.850874\n", + " 0.441230\n", + " 0.330559\n", + " 0.043714\n", + " 0.020016\n", + " 0.025905\n", " 1\n", " \n", " \n", @@ -4853,38 +4847,38 @@ " Sir Lancelot\n", " positive\n", " 0.835873\n", - " 0.830106\n", - " 0.046902\n", - " 0.810654\n", - " 0.016340\n", - " 0.095661\n", - " 0.021033\n", + " 0.823645\n", + " 0.031301\n", + " 0.496318\n", + " 0.135650\n", + " 0.015545\n", + " 0.022961\n", " 1\n", " \n", " \n", " 3\n", " image of W. G. Grace\n", " positive\n", - " 0.721918\n", - " 0.736080\n", - " 0.047242\n", - " 0.614332\n", - " 0.159497\n", - " 0.040378\n", - " 0.155298\n", + " 0.721919\n", + " 0.722026\n", + " 0.044130\n", + " 0.901205\n", + " 0.039773\n", + " 0.012838\n", + " 0.027599\n", " 1\n", " \n", " \n", " 4\n", - " Sir Galahad\n", - " positive\n", - " 0.835873\n", - " 0.638135\n", - " 0.046902\n", - " 0.810654\n", - " 0.016340\n", - " 0.095661\n", - " 0.021033\n", + " musical number\n", + " neutral\n", + " 0.000000\n", + " 0.621432\n", + " 0.312246\n", + " 0.174343\n", + " 0.032726\n", + " 0.077707\n", + " 0.045592\n", " 1\n", " \n", " \n", @@ -4893,18 +4887,18 @@ ], "text/plain": [ " text sentiment.label sentiment.score relevance \\\n", - "0 Sir Bedevere positive 0.835873 0.897263 \n", - "1 King Arthur neutral 0.000000 0.852288 \n", - "2 Sir Lancelot positive 0.835873 0.830106 \n", - "3 image of W. G. Grace positive 0.721918 0.736080 \n", - "4 Sir Galahad positive 0.835873 0.638135 \n", + "0 Sir Bedevere positive 0.835873 0.884359 \n", + "1 King Arthur neutral 0.000000 0.850874 \n", + "2 Sir Lancelot positive 0.835873 0.823645 \n", + "3 image of W. G. Grace positive 0.721919 0.722026 \n", + "4 musical number neutral 0.000000 0.621432 \n", "\n", " emotion.sadness emotion.joy emotion.fear emotion.disgust emotion.anger \\\n", - "0 0.046902 0.810654 0.016340 0.095661 0.021033 \n", - "1 0.062558 0.620066 0.054894 0.088147 0.182329 \n", - "2 0.046902 0.810654 0.016340 0.095661 0.021033 \n", - "3 0.047242 0.614332 0.159497 0.040378 0.155298 \n", - "4 0.046902 0.810654 0.016340 0.095661 0.021033 \n", + "0 0.031301 0.496318 0.135650 0.015545 0.022961 \n", + "1 0.441230 0.330559 0.043714 0.020016 0.025905 \n", + "2 0.031301 0.496318 0.135650 0.015545 0.022961 \n", + "3 0.044130 0.901205 0.039773 0.012838 0.027599 \n", + "4 0.312246 0.174343 0.032726 0.077707 0.045592 \n", "\n", " count \n", "0 1 \n", @@ -5118,19 +5112,19 @@ " \n", " \n", " 0\n", - " men\n", + " for men\n", " In AD 932, King Arthur and his squire, Patsy, ...\n", " the Knights of the Round Table\n", " join\n", - " future\n", - " to join\n", - " to join\n", + " infinitive\n", + " join\n", + " join\n", " \n", " \n", " 1\n", " he\n", - " Along the way, he recruits Sir Bedevere the W...\n", - " Sir Bedevere the Wise\n", + " Along the way, he recruits Sir Bedevere the Wi...\n", + " Sir Bedevere the Wise, Sir Lancelot the Brave,...\n", " recruit\n", " present\n", " recruits\n", @@ -5139,8 +5133,8 @@ " \n", " 2\n", " Arthur\n", - " Arthur leads the men to Camelot, but upon fur...\n", - " the men\n", + " Arthur leads the men to Camelot, but upon furt...\n", + " the men to Camelot\n", " lead\n", " present\n", " leads\n", @@ -5149,8 +5143,8 @@ " \n", " 3\n", " he\n", - " Arthur leads the men to Camelot, but upon fur...\n", - " not to go there\n", + " Arthur leads the men to Camelot, but upon furt...\n", + " not to go there because it is \"a silly place\"\n", " decide\n", " present\n", " decides\n", @@ -5159,12 +5153,12 @@ " \n", " 4\n", " he\n", - " Arthur leads the men to Camelot, but upon fur...\n", - " a musical number)\n", + " Arthur leads the men to Camelot, but upon furt...\n", + " None\n", + " go\n", + " infinitive\n", + " go\n", " go\n", - " future\n", - " to go\n", - " to go\n", " \n", " \n", "\n", @@ -5172,25 +5166,25 @@ ], "text/plain": [ " subject.text sentence \\\n", - "0 men In AD 932, King Arthur and his squire, Patsy, ... \n", - "1 he Along the way, he recruits Sir Bedevere the W... \n", - "2 Arthur Arthur leads the men to Camelot, but upon fur... \n", - "3 he Arthur leads the men to Camelot, but upon fur... \n", - "4 he Arthur leads the men to Camelot, but upon fur... \n", - "\n", - " object.text action.verb.text action.verb.tense \\\n", - "0 the Knights of the Round Table join future \n", - "1 Sir Bedevere the Wise recruit present \n", - "2 the men lead present \n", - "3 not to go there decide present \n", - "4 a musical number) go future \n", - "\n", - " action.text action.normalized \n", - "0 to join to join \n", - "1 recruits recruit \n", - "2 leads lead \n", - "3 decides decide \n", - "4 to go to go " + "0 for men In AD 932, King Arthur and his squire, Patsy, ... \n", + "1 he Along the way, he recruits Sir Bedevere the Wi... \n", + "2 Arthur Arthur leads the men to Camelot, but upon furt... \n", + "3 he Arthur leads the men to Camelot, but upon furt... \n", + "4 he Arthur leads the men to Camelot, but upon furt... \n", + "\n", + " object.text action.verb.text \\\n", + "0 the Knights of the Round Table join \n", + "1 Sir Bedevere the Wise, Sir Lancelot the Brave,... recruit \n", + "2 the men to Camelot lead \n", + "3 not to go there because it is \"a silly place\" decide \n", + "4 None go \n", + "\n", + " action.verb.tense action.text action.normalized \n", + "0 infinitive join join \n", + "1 present recruits recruit \n", + "2 present leads lead \n", + "3 present decides decide \n", + "4 infinitive go go " ] }, "execution_count": 35, @@ -5226,7 +5220,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/notebooks/Integrate_NLP_Libraries.ipynb b/notebooks/Integrate_NLP_Libraries.ipynb index 272b0eb6..62dfb9ad 100644 --- a/notebooks/Integrate_NLP_Libraries.ipynb +++ b/notebooks/Integrate_NLP_Libraries.ipynb @@ -174,7 +174,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -225,7 +225,7 @@ } ], "source": [ - "from IPython.core.display import display, HTML\n", + "from IPython.display import display, HTML\n", "doc_file = \"../resources/holy_grail_short.txt\"\n", "with open(doc_file, \"r\") as f:\n", " doc_text = f.read()\n", @@ -1949,8 +1949,8 @@ " compound\n", " 5\n", " Xxxx\n", - " O\n", - " \n", + " B\n", + " PERSON\n", " True\n", " False\n", " [0, 129): 'In AD 932, King Arthur and his squi...\n", @@ -1997,8 +1997,8 @@ " det\n", " 145\n", " xxx\n", - " O\n", - " \n", + " B\n", + " FAC\n", " True\n", " True\n", " [513, 629): 'As they turn away, God (an image ...\n", @@ -2013,8 +2013,8 @@ " compound\n", " 145\n", " Xxxx\n", - " O\n", - " \n", + " I\n", + " FAC\n", " True\n", " False\n", " [513, 629): 'As they turn away, God (an image ...\n", @@ -2029,8 +2029,8 @@ " dobj\n", " 142\n", " Xxxxx\n", - " O\n", - " \n", + " I\n", + " FAC\n", " True\n", " False\n", " [513, 629): 'As they turn away, God (an image ...\n", @@ -2075,12 +2075,12 @@ "1 B DATE True False \n", "2 I DATE False False \n", "3 O False False \n", - "4 O True False \n", + "4 B PERSON True False \n", ".. ... ... ... ... \n", "142 O True False \n", - "143 O True True \n", - "144 O True False \n", - "145 O True False \n", + "143 B FAC True True \n", + "144 I FAC True False \n", + "145 I FAC True False \n", "146 O False False \n", "\n", " sentence \n", @@ -2198,14 +2198,14 @@ " Galahad\n", " PROPN\n", " NNP\n", - " npadvmod\n", - " 32\n", + " appos\n", + " 39\n", " Xxxxx\n", " B\n", " PERSON\n", " True\n", " False\n", - " [130, 235): 'Along the way, he recruits Sir Be...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 45\n", @@ -2217,27 +2217,27 @@ " det\n", " 46\n", " xxx\n", - " I\n", - " PERSON\n", + " O\n", + " \n", " True\n", " True\n", - " [130, 235): 'Along the way, he recruits Sir Be...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 46\n", " 46\n", " [220, 224): 'Pure'\n", - " Pure\n", - " PROPN\n", - " NNP\n", + " pure\n", + " ADJ\n", + " JJ\n", " appos\n", " 44\n", " Xxxx\n", - " I\n", - " PERSON\n", + " O\n", + " \n", " True\n", " False\n", - " [130, 235): 'Along the way, he recruits Sir Be...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 47\n", @@ -2247,13 +2247,13 @@ " PUNCT\n", " ,\n", " punct\n", - " 46\n", + " 39\n", " ,\n", " O\n", " \n", " False\n", " False\n", - " [130, 235): 'Along the way, he recruits Sir Be...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 48\n", @@ -2269,7 +2269,7 @@ " \n", " True\n", " False\n", - " [130, 235): 'Along the way, he recruits Sir Be...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 49\n", @@ -2279,13 +2279,13 @@ " PROPN\n", " NNP\n", " appos\n", - " 46\n", + " 39\n", " Xxxxx\n", " B\n", " PERSON\n", " True\n", " False\n", - " [130, 235): 'Along the way, he recruits Sir Be...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 50\n", @@ -2295,13 +2295,13 @@ " DET\n", " DT\n", " det\n", - " 57\n", + " 63\n", " xxx\n", " O\n", " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 51\n", @@ -2317,7 +2317,7 @@ " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 52\n", @@ -2333,23 +2333,23 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 53\n", " 53\n", " [244, 249): 'Quite'\n", - " Quite\n", - " PROPN\n", - " NNP\n", - " compound\n", - " 55\n", + " quite\n", + " VERB\n", + " VB\n", + " nmod\n", + " 63\n", " Xxxxx\n", " O\n", " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 54\n", @@ -2359,21 +2359,21 @@ " PUNCT\n", " HYPH\n", " punct\n", - " 55\n", + " 53\n", " -\n", " O\n", " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 55\n", " 55\n", " [250, 252): 'So'\n", " so\n", - " ADV\n", - " RB\n", + " SCONJ\n", + " IN\n", " advmod\n", " 57\n", " Xx\n", @@ -2381,7 +2381,7 @@ " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 56\n", @@ -2397,23 +2397,23 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 57\n", " 57\n", " [253, 258): 'Brave'\n", " brave\n", - " NOUN\n", - " NN\n", - " ROOT\n", - " 57\n", + " VERB\n", + " VB\n", + " pobj\n", + " 53\n", " Xxxxx\n", " O\n", " \n", " True\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 58\n", @@ -2429,7 +2429,7 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 59\n", @@ -2445,7 +2445,7 @@ " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 60\n", @@ -2461,23 +2461,23 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 61\n", " 61\n", " [262, 265): 'Sir'\n", - " Sir\n", - " PROPN\n", - " NNP\n", - " compound\n", - " 63\n", + " sir\n", + " NOUN\n", + " NN\n", + " pobj\n", + " 59\n", " Xxx\n", " O\n", " \n", " True\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 62\n", @@ -2493,7 +2493,7 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 63\n", @@ -2502,14 +2502,14 @@ " Lancelot\n", " PROPN\n", " NNP\n", - " pobj\n", - " 59\n", + " appos\n", + " 49\n", " Xxxxx\n", " O\n", " \n", " True\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 64\n", @@ -2519,13 +2519,13 @@ " PUNCT\n", " ,\n", " punct\n", - " 57\n", + " 39\n", " ,\n", " O\n", " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 65\n", @@ -2535,13 +2535,13 @@ " CCONJ\n", " CC\n", " cc\n", - " 57\n", + " 39\n", " xxx\n", " O\n", " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 66\n", @@ -2550,14 +2550,14 @@ " Sir\n", " PROPN\n", " NNP\n", - " compound\n", + " npadvmod\n", " 69\n", " Xxx\n", " O\n", " \n", " True\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 67\n", @@ -2573,7 +2573,7 @@ " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 68\n", @@ -2589,23 +2589,23 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 69\n", " 69\n", " [288, 297): 'Appearing'\n", - " appearing\n", - " NOUN\n", - " NN\n", + " appear\n", + " VERB\n", + " VBG\n", " conj\n", - " 57\n", + " 39\n", " Xxxxx\n", " O\n", " \n", " True\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 70\n", @@ -2621,7 +2621,7 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 71\n", @@ -2637,7 +2637,7 @@ " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 72\n", @@ -2653,23 +2653,23 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 73\n", " 73\n", " [301, 305): 'this'\n", " this\n", - " DET\n", + " PRON\n", " DT\n", - " det\n", - " 75\n", + " pobj\n", + " 71\n", " xxxx\n", " O\n", " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 74\n", @@ -2685,7 +2685,7 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 75\n", @@ -2694,14 +2694,14 @@ " Film\n", " PROPN\n", " NNP\n", - " pobj\n", - " 71\n", + " appos\n", + " 69\n", " Xxxx\n", " O\n", " \n", " True\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 76\n", @@ -2717,7 +2717,7 @@ " \n", " False\n", " False\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 77\n", @@ -2727,13 +2727,13 @@ " ADP\n", " IN\n", " prep\n", - " 57\n", + " 69\n", " xxxx\n", " O\n", " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 78\n", @@ -2749,7 +2749,7 @@ " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", " 79\n", @@ -2765,126 +2765,126 @@ " \n", " True\n", " True\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id span lemma pos tag dep head \\\n", - "44 44 [208, 215): 'Galahad' Galahad PROPN NNP npadvmod 32 \n", - "45 45 [216, 219): 'the' the DET DT det 46 \n", - "46 46 [220, 224): 'Pure' Pure PROPN NNP appos 44 \n", - "47 47 [224, 225): ',' , PUNCT , punct 46 \n", - "48 48 [226, 229): 'Sir' Sir PROPN NNP compound 49 \n", - "49 49 [230, 235): 'Robin' Robin PROPN NNP appos 46 \n", - "50 50 [236, 239): 'the' the DET DT det 57 \n", - "51 51 [240, 243): 'Not' not PART RB neg 53 \n", - "52 52 [243, 244): '-' - PUNCT HYPH punct 53 \n", - "53 53 [244, 249): 'Quite' Quite PROPN NNP compound 55 \n", - "54 54 [249, 250): '-' - PUNCT HYPH punct 55 \n", - "55 55 [250, 252): 'So' so ADV RB advmod 57 \n", - "56 56 [252, 253): '-' - PUNCT HYPH punct 57 \n", - "57 57 [253, 258): 'Brave' brave NOUN NN ROOT 57 \n", - "58 58 [258, 259): '-' - PUNCT HYPH punct 57 \n", - "59 59 [259, 261): 'as' as ADP IN prep 57 \n", - "60 60 [261, 262): '-' - PUNCT HYPH punct 59 \n", - "61 61 [262, 265): 'Sir' Sir PROPN NNP compound 63 \n", - "62 62 [265, 266): '-' - PUNCT HYPH punct 63 \n", - "63 63 [266, 274): 'Lancelot' Lancelot PROPN NNP pobj 59 \n", - "64 64 [274, 275): ',' , PUNCT , punct 57 \n", - "65 65 [276, 279): 'and' and CCONJ CC cc 57 \n", - "66 66 [280, 283): 'Sir' Sir PROPN NNP compound 69 \n", - "67 67 [284, 287): 'Not' not PART RB neg 69 \n", - "68 68 [287, 288): '-' - PUNCT HYPH punct 69 \n", - "69 69 [288, 297): 'Appearing' appearing NOUN NN conj 57 \n", - "70 70 [297, 298): '-' - PUNCT HYPH punct 69 \n", - "71 71 [298, 300): 'in' in ADP IN prep 69 \n", - "72 72 [300, 301): '-' - PUNCT HYPH punct 71 \n", - "73 73 [301, 305): 'this' this DET DT det 75 \n", - "74 74 [305, 306): '-' - PUNCT HYPH punct 75 \n", - "75 75 [306, 310): 'Film' Film PROPN NNP pobj 71 \n", - "76 76 [310, 311): ',' , PUNCT , punct 69 \n", - "77 77 [312, 317): 'along' along ADP IN prep 57 \n", - "78 78 [318, 322): 'with' with ADP IN prep 77 \n", - "79 79 [323, 328): 'their' their PRON PRP$ poss 80 \n", - "\n", - " shape ent_iob ent_type is_alpha is_stop \\\n", - "44 Xxxxx B PERSON True False \n", - "45 xxx I PERSON True True \n", - "46 Xxxx I PERSON True False \n", - "47 , O False False \n", - "48 Xxx O True False \n", - "49 Xxxxx B PERSON True False \n", - "50 xxx O True True \n", - "51 Xxx O True True \n", - "52 - O False False \n", - "53 Xxxxx O True True \n", - "54 - O False False \n", - "55 Xx O True True \n", - "56 - O False False \n", - "57 Xxxxx O True False \n", - "58 - O False False \n", - "59 xx O True True \n", - "60 - O False False \n", - "61 Xxx O True False \n", - "62 - O False False \n", - "63 Xxxxx O True False \n", - "64 , O False False \n", - "65 xxx O True True \n", - "66 Xxx O True False \n", - "67 Xxx O True True \n", - "68 - O False False \n", - "69 Xxxxx O True False \n", - "70 - O False False \n", - "71 xx O True True \n", - "72 - O False False \n", - "73 xxxx O True True \n", - "74 - O False False \n", - "75 Xxxx O True False \n", - "76 , O False False \n", - "77 xxxx O True True \n", - "78 xxxx O True True \n", - "79 xxxx O True True \n", + " id span lemma pos tag dep head shape \\\n", + "44 44 [208, 215): 'Galahad' Galahad PROPN NNP appos 39 Xxxxx \n", + "45 45 [216, 219): 'the' the DET DT det 46 xxx \n", + "46 46 [220, 224): 'Pure' pure ADJ JJ appos 44 Xxxx \n", + "47 47 [224, 225): ',' , PUNCT , punct 39 , \n", + "48 48 [226, 229): 'Sir' Sir PROPN NNP compound 49 Xxx \n", + "49 49 [230, 235): 'Robin' Robin PROPN NNP appos 39 Xxxxx \n", + "50 50 [236, 239): 'the' the DET DT det 63 xxx \n", + "51 51 [240, 243): 'Not' not PART RB neg 53 Xxx \n", + "52 52 [243, 244): '-' - PUNCT HYPH punct 53 - \n", + "53 53 [244, 249): 'Quite' quite VERB VB nmod 63 Xxxxx \n", + "54 54 [249, 250): '-' - PUNCT HYPH punct 53 - \n", + "55 55 [250, 252): 'So' so SCONJ IN advmod 57 Xx \n", + "56 56 [252, 253): '-' - PUNCT HYPH punct 57 - \n", + "57 57 [253, 258): 'Brave' brave VERB VB pobj 53 Xxxxx \n", + "58 58 [258, 259): '-' - PUNCT HYPH punct 57 - \n", + "59 59 [259, 261): 'as' as ADP IN prep 57 xx \n", + "60 60 [261, 262): '-' - PUNCT HYPH punct 59 - \n", + "61 61 [262, 265): 'Sir' sir NOUN NN pobj 59 Xxx \n", + "62 62 [265, 266): '-' - PUNCT HYPH punct 63 - \n", + "63 63 [266, 274): 'Lancelot' Lancelot PROPN NNP appos 49 Xxxxx \n", + "64 64 [274, 275): ',' , PUNCT , punct 39 , \n", + "65 65 [276, 279): 'and' and CCONJ CC cc 39 xxx \n", + "66 66 [280, 283): 'Sir' Sir PROPN NNP npadvmod 69 Xxx \n", + "67 67 [284, 287): 'Not' not PART RB neg 69 Xxx \n", + "68 68 [287, 288): '-' - PUNCT HYPH punct 69 - \n", + "69 69 [288, 297): 'Appearing' appear VERB VBG conj 39 Xxxxx \n", + "70 70 [297, 298): '-' - PUNCT HYPH punct 69 - \n", + "71 71 [298, 300): 'in' in ADP IN prep 69 xx \n", + "72 72 [300, 301): '-' - PUNCT HYPH punct 71 - \n", + "73 73 [301, 305): 'this' this PRON DT pobj 71 xxxx \n", + "74 74 [305, 306): '-' - PUNCT HYPH punct 75 - \n", + "75 75 [306, 310): 'Film' Film PROPN NNP appos 69 Xxxx \n", + "76 76 [310, 311): ',' , PUNCT , punct 69 , \n", + "77 77 [312, 317): 'along' along ADP IN prep 69 xxxx \n", + "78 78 [318, 322): 'with' with ADP IN prep 77 xxxx \n", + "79 79 [323, 328): 'their' their PRON PRP$ poss 80 xxxx \n", + "\n", + " ent_iob ent_type is_alpha is_stop \\\n", + "44 B PERSON True False \n", + "45 O True True \n", + "46 O True False \n", + "47 O False False \n", + "48 O True False \n", + "49 B PERSON True False \n", + "50 O True True \n", + "51 O True True \n", + "52 O False False \n", + "53 O True True \n", + "54 O False False \n", + "55 O True True \n", + "56 O False False \n", + "57 O True False \n", + "58 O False False \n", + "59 O True True \n", + "60 O False False \n", + "61 O True False \n", + "62 O False False \n", + "63 O True False \n", + "64 O False False \n", + "65 O True True \n", + "66 O True False \n", + "67 O True True \n", + "68 O False False \n", + "69 O True False \n", + "70 O False False \n", + "71 O True True \n", + "72 O False False \n", + "73 O True True \n", + "74 O False False \n", + "75 O True False \n", + "76 O False False \n", + "77 O True True \n", + "78 O True True \n", + "79 O True True \n", "\n", " sentence \n", - "44 [130, 235): 'Along the way, he recruits Sir Be... \n", - "45 [130, 235): 'Along the way, he recruits Sir Be... \n", - "46 [130, 235): 'Along the way, he recruits Sir Be... \n", - "47 [130, 235): 'Along the way, he recruits Sir Be... \n", - "48 [130, 235): 'Along the way, he recruits Sir Be... \n", - "49 [130, 235): 'Along the way, he recruits Sir Be... \n", - "50 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "51 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "52 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "53 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "54 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "55 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "56 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "57 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "58 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "59 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "60 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "61 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "62 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "63 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "64 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "65 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "66 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "67 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "68 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "69 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "70 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "71 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "72 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "73 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "74 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "75 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "76 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "77 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "78 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... \n", - "79 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan... " + "44 [130, 361): 'Along the way, he recruits Sir Be... \n", + "45 [130, 361): 'Along the way, he recruits Sir Be... \n", + "46 [130, 361): 'Along the way, he recruits Sir Be... \n", + "47 [130, 361): 'Along the way, he recruits Sir Be... \n", + "48 [130, 361): 'Along the way, he recruits Sir Be... \n", + "49 [130, 361): 'Along the way, he recruits Sir Be... \n", + "50 [130, 361): 'Along the way, he recruits Sir Be... \n", + "51 [130, 361): 'Along the way, he recruits Sir Be... \n", + "52 [130, 361): 'Along the way, he recruits Sir Be... \n", + "53 [130, 361): 'Along the way, he recruits Sir Be... \n", + "54 [130, 361): 'Along the way, he recruits Sir Be... \n", + "55 [130, 361): 'Along the way, he recruits Sir Be... \n", + "56 [130, 361): 'Along the way, he recruits Sir Be... \n", + "57 [130, 361): 'Along the way, he recruits Sir Be... \n", + "58 [130, 361): 'Along the way, he recruits Sir Be... \n", + "59 [130, 361): 'Along the way, he recruits Sir Be... \n", + "60 [130, 361): 'Along the way, he recruits Sir Be... \n", + "61 [130, 361): 'Along the way, he recruits Sir Be... \n", + "62 [130, 361): 'Along the way, he recruits Sir Be... \n", + "63 [130, 361): 'Along the way, he recruits Sir Be... \n", + "64 [130, 361): 'Along the way, he recruits Sir Be... \n", + "65 [130, 361): 'Along the way, he recruits Sir Be... \n", + "66 [130, 361): 'Along the way, he recruits Sir Be... \n", + "67 [130, 361): 'Along the way, he recruits Sir Be... \n", + "68 [130, 361): 'Along the way, he recruits Sir Be... \n", + "69 [130, 361): 'Along the way, he recruits Sir Be... \n", + "70 [130, 361): 'Along the way, he recruits Sir Be... \n", + "71 [130, 361): 'Along the way, he recruits Sir Be... \n", + "72 [130, 361): 'Along the way, he recruits Sir Be... \n", + "73 [130, 361): 'Along the way, he recruits Sir Be... \n", + "74 [130, 361): 'Along the way, he recruits Sir Be... \n", + "75 [130, 361): 'Along the way, he recruits Sir Be... \n", + "76 [130, 361): 'Along the way, he recruits Sir Be... \n", + "77 [130, 361): 'Along the way, he recruits Sir Be... \n", + "78 [130, 361): 'Along the way, he recruits Sir Be... \n", + "79 [130, 361): 'Along the way, he recruits Sir Be... " ] }, "execution_count": 21, @@ -2938,11 +2938,7 @@ " \n", " \n", " 44\n", - " [130, 235): 'Along the way, he recruits Sir Be...\n", - " \n", - " \n", - " 50\n", - " [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan...\n", + " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", "\n", @@ -2950,8 +2946,7 @@ ], "text/plain": [ " sentence\n", - "44 [130, 235): 'Along the way, he recruits Sir Be...\n", - "50 [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lan..." + "44 [130, 361): 'Along the way, he recruits Sir Be..." ] }, "execution_count": 22, @@ -3622,23 +3617,12 @@ " \n", " 0\n", " 130\n", - " 235\n", - "\n", - " 27\n", - " 50\n", - "\n", - " Along the way, he recruits Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Galahad the Pure, Sir Robin\n", - " \n", - "\n", - " \n", - " 1\n", - " 236\n", " 361\n", "\n", - " 50\n", + " 27\n", " 86\n", "\n", - " the Not-Quite-So-Brave-as-Sir-Lancelot, and Sir Not-Appearing-in-this-Film, along with their squires and Robin's troubadours.\n", + " Along the way, he recruits Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Galahad the Pure, Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot, and Sir Not-Appearing-in-this-Film, along with their squires and Robin's troubadours.\n", " \n", "\n", " \n", @@ -3650,11 +3634,7 @@ "\n", " In AD 932, King Arthur and his squire, Patsy, travel throughout Britain searching for men to join the Knights of the Round Table. \n", "\n", - " Along the way, he recruits Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Galahad the Pure, Sir Robin\n", - "\n", - "\n", - "\n", - " the Not-Quite-So-Brave-as-Sir-Lancelot, and Sir Not-Appearing-in-this-Film, along with their squires and Robin's troubadours.\n", + " Along the way, he recruits Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Galahad the Pure, Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot, and Sir Not-Appearing-in-this-Film, along with their squires and Robin's troubadours.\n", " Arthur leads the men to Camelot, but upon further consideration (thanks to a musical number) he decides not to go there because it is "a silly place". As they turn away, God (an image of W. G. Grace) speaks to them and gives Arthur the task of finding the Holy Grail.\n", "

\n", " \n", @@ -3669,10 +3649,10 @@ "\n", " {\n", "\n", - " const doc_spans = [[130,235],[236,361]]\n", + " const doc_spans = [[130,361]]\n", " const doc_text = 'In AD 932, King Arthur and his squire, Patsy, travel throughout Britain searching for men to join the Knights of the Round Table. Along the way, he recruits Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Galahad the Pure, Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot, and Sir Not-Appearing-in-this-Film, along with their squires and Robin\\'s troubadours. Arthur leads the men to Camelot, but upon further consideration (thanks to a musical number) he decides not to go there because it is \"a silly place\". As they turn away, God (an image of W. G. Grace) speaks to them and gives Arthur the task of finding the Holy Grail.'\n", "\n", - " const doc_token_spans = [[27,50],[50,86]]\n", + " const doc_token_spans = [[27,86]]\n", " documents.push({doc_text: doc_text, doc_spans: doc_spans, doc_token_spans: doc_token_spans})\n", "\n", " }\n", @@ -3685,8 +3665,8 @@ ], "text/plain": [ "\n", - "[[130, 235): 'Along the way, he recruits Sir Bedevere the Wise, Sir Lancelot the Brave, [...]', [236, 361): 'the Not-Quite-So-Brave-as-Sir-Lancelot, and Sir Not-Appearing-in-this- [...]']\n", - "Length: 2, dtype: TokenSpanDtype" + "[[130, 361): 'Along the way, he recruits Sir Bedevere the Wise, Sir Lancelot the Brave, [...]']\n", + "Length: 1, dtype: TokenSpanDtype" ] }, "execution_count": 23, @@ -3792,8 +3772,8 @@ " \n", " 51\n", " [240, 243): 'Not'\n", - " ADV\n", - " not\n", + " PROPN\n", + " None\n", " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", @@ -3834,8 +3814,8 @@ " \n", " 57\n", " [253, 258): 'Brave'\n", - " ADJ\n", - " brave\n", + " PROPN\n", + " Brave\n", " [130, 361): 'Along the way, he recruits Sir Be...\n", " \n", " \n", @@ -4005,13 +3985,13 @@ "48 [226, 229): 'Sir' PROPN Sir \n", "49 [230, 235): 'Robin' PROPN Robin \n", "50 [236, 239): 'the' DET the \n", - "51 [240, 243): 'Not' ADV not \n", + "51 [240, 243): 'Not' PROPN None \n", "52 [243, 244): '-' PUNCT None \n", "53 [244, 249): 'Quite' PROPN None \n", "54 [249, 250): '-' PUNCT None \n", "55 [250, 252): 'So' ADV so \n", "56 [252, 253): '-' PUNCT None \n", - "57 [253, 258): 'Brave' ADJ brave \n", + "57 [253, 258): 'Brave' PROPN Brave \n", "58 [258, 259): '-' PUNCT None \n", "59 [259, 261): 'as' ADP as \n", "60 [261, 262): '-' PUNCT None \n", @@ -4861,8 +4841,8 @@ " Galahad\n", " PROPN\n", " NNP\n", - " npadvmod\n", - " 32\n", + " appos\n", + " 39\n", " Xxxxx\n", " B\n", " PERSON\n", @@ -4880,8 +4860,8 @@ " det\n", " 46\n", " xxx\n", - " I\n", - " PERSON\n", + " O\n", + " \n", " True\n", " True\n", " [130, 361): 'Along the way, he recruits Sir Be...\n", @@ -4890,14 +4870,14 @@ " 46\n", " 46\n", " [220, 224): 'Pure'\n", - " Pure\n", - " PROPN\n", - " NNP\n", + " pure\n", + " ADJ\n", + " JJ\n", " appos\n", " 44\n", " Xxxx\n", - " I\n", - " PERSON\n", + " O\n", + " \n", " True\n", " False\n", " [130, 361): 'Along the way, he recruits Sir Be...\n", @@ -4910,7 +4890,7 @@ " PUNCT\n", " ,\n", " punct\n", - " 46\n", + " 39\n", " ,\n", " O\n", " \n", @@ -4942,7 +4922,7 @@ " PROPN\n", " NNP\n", " appos\n", - " 46\n", + " 39\n", " Xxxxx\n", " B\n", " PERSON\n", @@ -4958,7 +4938,7 @@ " DET\n", " DT\n", " det\n", - " 57\n", + " 63\n", " xxx\n", " O\n", " \n", @@ -5002,11 +4982,11 @@ " 53\n", " 53\n", " [244, 249): 'Quite'\n", - " Quite\n", - " PROPN\n", - " NNP\n", - " compound\n", - " 55\n", + " quite\n", + " VERB\n", + " VB\n", + " nmod\n", + " 63\n", " Xxxxx\n", " O\n", " \n", @@ -5020,21 +5000,21 @@ ], "text/plain": [ " id span lemma pos tag dep head shape \\\n", - "44 44 [208, 215): 'Galahad' Galahad PROPN NNP npadvmod 32 Xxxxx \n", + "44 44 [208, 215): 'Galahad' Galahad PROPN NNP appos 39 Xxxxx \n", "45 45 [216, 219): 'the' the DET DT det 46 xxx \n", - "46 46 [220, 224): 'Pure' Pure PROPN NNP appos 44 Xxxx \n", - "47 47 [224, 225): ',' , PUNCT , punct 46 , \n", + "46 46 [220, 224): 'Pure' pure ADJ JJ appos 44 Xxxx \n", + "47 47 [224, 225): ',' , PUNCT , punct 39 , \n", "48 48 [226, 229): 'Sir' Sir PROPN NNP compound 49 Xxx \n", - "49 49 [230, 235): 'Robin' Robin PROPN NNP appos 46 Xxxxx \n", - "50 50 [236, 239): 'the' the DET DT det 57 xxx \n", + "49 49 [230, 235): 'Robin' Robin PROPN NNP appos 39 Xxxxx \n", + "50 50 [236, 239): 'the' the DET DT det 63 xxx \n", "51 51 [240, 243): 'Not' not PART RB neg 53 Xxx \n", "52 52 [243, 244): '-' - PUNCT HYPH punct 53 - \n", - "53 53 [244, 249): 'Quite' Quite PROPN NNP compound 55 Xxxxx \n", + "53 53 [244, 249): 'Quite' quite VERB VB nmod 63 Xxxxx \n", "\n", " ent_iob ent_type is_alpha is_stop \\\n", "44 B PERSON True False \n", - "45 I PERSON True True \n", - "46 I PERSON True False \n", + "45 O True True \n", + "46 O True False \n", "47 O False False \n", "48 O True False \n", "49 B PERSON True False \n", @@ -5090,449 +5070,417 @@ { "data": { "text/html": [ - "\n", - "\n", + "\n", + "\n", " Galahad\n", " NNP\n", "\n", "\n", - "\n", + "\n", " the\n", " DT\n", "\n", "\n", - "\n", + "\n", " Pure\n", - " NNP\n", + " JJ\n", "\n", "\n", - "\n", + "\n", " ,\n", " ,\n", "\n", "\n", - "\n", + "\n", " Sir\n", " NNP\n", "\n", "\n", - "\n", + "\n", " Robin\n", " NNP\n", "\n", "\n", - "\n", + "\n", " the\n", " DT\n", "\n", "\n", - "\n", + "\n", " Not\n", " RB\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " Quite\n", - " NNP\n", + " VB\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " So\n", - " RB\n", + " IN\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " Brave\n", - " NN\n", + " VB\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " as\n", " IN\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " Sir\n", - " NNP\n", + " NN\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " Lancelot\n", " NNP\n", "\n", "\n", - "\n", + "\n", " ,\n", " ,\n", "\n", "\n", - "\n", + "\n", " and\n", " CC\n", "\n", "\n", - "\n", + "\n", " Sir\n", " NNP\n", "\n", "\n", - "\n", + "\n", " Not\n", " RB\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " Appearing\n", - " NN\n", + " VBG\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " in\n", " IN\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " this\n", " DT\n", "\n", "\n", - "\n", + "\n", " -\n", " HYPH\n", "\n", "\n", - "\n", + "\n", " Film\n", " NNP\n", "\n", "\n", - "\n", + "\n", " ,\n", " ,\n", "\n", "\n", - "\n", + "\n", " along\n", " IN\n", "\n", "\n", - "\n", + "\n", " with\n", " IN\n", "\n", "\n", - "\n", + "\n", " their\n", " PRP$\n", "\n", "\n", "\n", - " \n", - " \n", - " det\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " appos\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " punct\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " compound\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", + " \n", " \n", - " appos\n", + " det\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " det\n", + " appos\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " neg\n", + " compound\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " det\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " compound\n", + " neg\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " advmod\n", + " nmod\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " advmod\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " prep\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " pobj\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " compound\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " prep\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " pobj\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " pobj\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " cc\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " compound\n", + " appos\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " neg\n", + " npadvmod\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " neg\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " conj\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " prep\n", + " prep\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " det\n", + " pobj\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " pobj\n", + " appos\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " punct\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " prep\n", + " prep\n", " \n", - " \n", + " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " prep\n", + " prep\n", " \n", - " \n", + " \n", "\n", "" ], @@ -5583,7 +5531,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.8.17" }, "toc-autonumbering": false }, diff --git a/notebooks/Model_Training_with_BERT.ipynb b/notebooks/Model_Training_with_BERT.ipynb index bdc8520e..0ef137cd 100644 --- a/notebooks/Model_Training_with_BERT.ipynb +++ b/notebooks/Model_Training_with_BERT.ipynb @@ -1414,7 +1414,7 @@ " O\n", " <NA>\n", " O\n", - " [ -0.19854125, -0.46898478, 0.7755599...\n", + " [ -0.19854169, -0.46898514, 0.7755601...\n", " \n", " \n", " 11\n", @@ -1424,7 +1424,7 @@ " O\n", " <NA>\n", " O\n", - " [ -0.24190304, -0.42399377, 0.955406...\n", + " [ -0.24190396, -0.42399377, 0.9554063...\n", " \n", " \n", " 12\n", @@ -1434,7 +1434,7 @@ " O\n", " <NA>\n", " O\n", - " [ -0.20076738, -0.7481939, 1.302213...\n", + " [ -0.20076752, -0.7481933, 1.302213...\n", " \n", " \n", " 13\n", @@ -1444,7 +1444,7 @@ " B\n", " LOC\n", " B-LOC\n", - " [ 0.2020257, -0.26199907, 0.3297634...\n", + " [ 0.20202553, -0.26199815, 0.3297633...\n", " \n", " \n", " 14\n", @@ -1454,7 +1454,7 @@ " I\n", " LOC\n", " I-LOC\n", - " [ -0.5462166, -0.90924495, -0.05836733...\n", + " [ -0.5462168, -0.90924424, -0.0583674...\n", " \n", " \n", " 15\n", @@ -1464,7 +1464,7 @@ " I\n", " LOC\n", " I-LOC\n", - " [ -0.37400314, -0.6890743, -0.1446248...\n", + " [ -0.37400252, -0.6890734, -0.1446257...\n", " \n", " \n", " 16\n", @@ -1474,7 +1474,7 @@ " I\n", " LOC\n", " I-LOC\n", - " [ -0.46548596, -0.8717423, 0.3557480...\n", + " [ -0.46548516, -0.8717417, 0.3557479...\n", " \n", " \n", " 17\n", @@ -1484,7 +1484,7 @@ " I\n", " LOC\n", " I-LOC\n", - " [ -0.18682732, -0.9008188, 0.3601504...\n", + " [ -0.18682763, -0.90081865, 0.3601499...\n", " \n", " \n", " 18\n", @@ -1494,7 +1494,7 @@ " O\n", " <NA>\n", " O\n", - " [ -0.16640136, -0.8363809, 0.874061...\n", + " [ -0.16640103, -0.8363804, 0.8740610...\n", " \n", " \n", " 19\n", @@ -1504,7 +1504,7 @@ " B\n", " LOC\n", " B-LOC\n", - " [ -0.3024105, -0.8382667, 1.105809...\n", + " [ -0.30241105, -0.83826715, 1.105809...\n", " \n", " \n", "\n", @@ -1524,16 +1524,16 @@ "19 19 [31, 33): 'NE' 26546 B LOC B-LOC \n", "\n", " embedding \n", - "10 [ -0.19854125, -0.46898478, 0.7755599... \n", - "11 [ -0.24190304, -0.42399377, 0.955406... \n", - "12 [ -0.20076738, -0.7481939, 1.302213... \n", - "13 [ 0.2020257, -0.26199907, 0.3297634... \n", - "14 [ -0.5462166, -0.90924495, -0.05836733... \n", - "15 [ -0.37400314, -0.6890743, -0.1446248... \n", - "16 [ -0.46548596, -0.8717423, 0.3557480... \n", - "17 [ -0.18682732, -0.9008188, 0.3601504... \n", - "18 [ -0.16640136, -0.8363809, 0.874061... \n", - "19 [ -0.3024105, -0.8382667, 1.105809... " + "10 [ -0.19854169, -0.46898514, 0.7755601... \n", + "11 [ -0.24190396, -0.42399377, 0.9554063... \n", + "12 [ -0.20076752, -0.7481933, 1.302213... \n", + "13 [ 0.20202553, -0.26199815, 0.3297633... \n", + "14 [ -0.5462168, -0.90924424, -0.0583674... \n", + "15 [ -0.37400252, -0.6890734, -0.1446257... \n", + "16 [ -0.46548516, -0.8717417, 0.3557479... \n", + "17 [ -0.18682763, -0.90081865, 0.3601499... \n", + "18 [ -0.16640103, -0.8363804, 0.8740610... \n", + "19 [ -0.30241105, -0.83826715, 1.105809... " ] }, "execution_count": 12, @@ -1591,35 +1591,35 @@ " [155, 168): 'international'\n", " O\n", " <NA>\n", - " [ 0.23405041, -0.5534875, 0.9083985, ...\n", + " [ 0.23404993, -0.5534872, 0.9083986, ...\n", " \n", " \n", " 71\n", " [169, 176): 'between'\n", " O\n", " <NA>\n", - " [ 0.27792975, -0.6853796, 1.1050363, ...\n", + " [ 0.27793035, -0.68538034, 1.1050361, ...\n", " \n", " \n", " 72\n", " [177, 185): 'Pakistan'\n", " B\n", " LOC\n", - " [ 0.19718906, -0.46341094, 0.5182328, ...\n", + " [ 0.1971882, -0.4634109, 0.5182331, ...\n", " \n", " \n", " 73\n", " [186, 189): 'and'\n", " O\n", " <NA>\n", - " [ 0.20423545, -0.63758826, 0.82874423, ...\n", + " [ 0.20423535, -0.63758826, 0.82874435, ...\n", " \n", " \n", " 74\n", " [190, 193): 'New'\n", " B\n", " LOC\n", - " [ 0.28740737, -0.47174248, 0.77719426, ...\n", + " [ 0.2874066, -0.47174183, 0.7771955, ...\n", " \n", " \n", "\n", @@ -1634,11 +1634,11 @@ "74 [190, 193): 'New' B LOC \n", "\n", " embedding \n", - "70 [ 0.23405041, -0.5534875, 0.9083985, ... \n", - "71 [ 0.27792975, -0.6853796, 1.1050363, ... \n", - "72 [ 0.19718906, -0.46341094, 0.5182328, ... \n", - "73 [ 0.20423545, -0.63758826, 0.82874423, ... \n", - "74 [ 0.28740737, -0.47174248, 0.77719426, ... " + "70 [ 0.23404993, -0.5534872, 0.9083986, ... \n", + "71 [ 0.27793035, -0.68538034, 1.1050361, ... \n", + "72 [ 0.1971882, -0.4634109, 0.5182331, ... \n", + "73 [ 0.20423535, -0.63758826, 0.82874435, ... \n", + "74 [ 0.2874066, -0.47174183, 0.7771955, ... " ] }, "execution_count": 13, @@ -1658,7 +1658,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 14, @@ -1763,7 +1763,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.08307116, -0.35959044, 1.015067...\n", + " [ -0.08307081, -0.35959032, 1.015068...\n", " \n", " \n", " 1\n", @@ -1777,7 +1777,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.22862588, -0.49313605, 1.284232...\n", + " [ -0.22862603, -0.49313632, 1.28423...\n", " \n", " \n", " 2\n", @@ -1791,7 +1791,7 @@ " <NA>\n", " O\n", " 0\n", - " [ 0.028480446, -0.17874268, 1.54320...\n", + " [ 0.028480662, -0.17874284, 1.54320...\n", " \n", " \n", " 3\n", @@ -1805,7 +1805,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.46517605, -0.29836014, 1.073768...\n", + " [ -0.4651753, -0.29836023, 1.073767...\n", " \n", " \n", " 4\n", @@ -1819,7 +1819,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.10730826, -0.3372096, 1.226979...\n", + " [ -0.10730811, -0.33720982, 1.226979...\n", " \n", " \n", " ...\n", @@ -1847,7 +1847,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.12806588, -0.002324244, 0.6781316...\n", + " [ -0.1280663, -0.0023243837, 0.678132...\n", " \n", " \n", " 685\n", @@ -1861,7 +1861,7 @@ " <NA>\n", " O\n", " 0\n", - " [ 0.30534068, -0.52625746, 0.8281702...\n", + " [ 0.3053407, -0.52625775, 0.8281702...\n", " \n", " \n", " 686\n", @@ -1875,7 +1875,7 @@ " LOC\n", " B-LOC\n", " 1\n", - " [ -0.04873929, -0.3379735, -0.0583514...\n", + " [ -0.048738778, -0.33797324, -0.0583509...\n", " \n", " \n", " 687\n", @@ -1889,7 +1889,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.0052893925, -0.29743084, 0.716173...\n", + " [ -0.005289644, -0.29743072, 0.716173...\n", " \n", " \n", " 688\n", @@ -1903,7 +1903,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.5030238, 0.36253875, 0.731493...\n", + " [ -0.50302404, 0.36253828, 0.7314933...\n", " \n", " \n", "\n", @@ -1938,17 +1938,17 @@ "688 1 True O O \n", "\n", " token_class_id embedding \n", - "0 0 [ -0.08307116, -0.35959044, 1.015067... \n", - "1 0 [ -0.22862588, -0.49313605, 1.284232... \n", - "2 0 [ 0.028480446, -0.17874268, 1.54320... \n", - "3 0 [ -0.46517605, -0.29836014, 1.073768... \n", - "4 0 [ -0.10730826, -0.3372096, 1.226979... \n", + "0 0 [ -0.08307081, -0.35959032, 1.015068... \n", + "1 0 [ -0.22862603, -0.49313632, 1.28423... \n", + "2 0 [ 0.028480662, -0.17874284, 1.54320... \n", + "3 0 [ -0.4651753, -0.29836023, 1.073767... \n", + "4 0 [ -0.10730811, -0.33720982, 1.226979... \n", ".. ... ... \n", - "684 0 [ -0.12806588, -0.002324244, 0.6781316... \n", - "685 0 [ 0.30534068, -0.52625746, 0.8281702... \n", - "686 1 [ -0.04873929, -0.3379735, -0.0583514... \n", - "687 0 [ -0.0052893925, -0.29743084, 0.716173... \n", - "688 0 [ -0.5030238, 0.36253875, 0.731493... \n", + "684 0 [ -0.1280663, -0.0023243837, 0.678132... \n", + "685 0 [ 0.3053407, -0.52625775, 0.8281702... \n", + "686 1 [ -0.048738778, -0.33797324, -0.0583509... \n", + "687 0 [ -0.005289644, -0.29743072, 0.716173... \n", + "688 0 [ -0.50302404, 0.36253828, 0.7314933... \n", "\n", "[689 rows x 11 columns]" ] @@ -1982,7 +1982,7 @@ "metadata": {}, "outputs": [], "source": [ - "SHRINK_EMBEDDINGS = True\n", + "SHRINK_EMBEDDINGS = False\n", "PROJECTION_DIMS = 256\n", "RANDOM_SEED=42\n", "\n", @@ -2011,7 +2011,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f61f03eedf5b43cab24475ac398fa9a8", + "model_id": "733bd98d8a8f4959b5668020f1984a3c", "version_major": 2, "version_minor": 0 }, @@ -2032,7 +2032,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1ecf920613fe421ba4b444c386bf3254", + "model_id": "2e47f857a39a44d7ac5a3f34f60494cb", "version_major": 2, "version_minor": 0 }, @@ -2053,7 +2053,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1ba49f8191854746a0e61dfc552da322", + "model_id": "31f95cf00b394566a13997459a76db17", "version_major": 2, "version_minor": 0 }, @@ -2111,7 +2111,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.06799730722665887, 2.664292496984028...\n", + " [ -0.17669655, -0.3989963, 0.908887...\n", " \n", " \n", " 1\n", @@ -2125,7 +2125,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.7262477871614377, 2.600414199244437...\n", + " [ -0.3855382, -0.50232756, 1.173232...\n", " \n", " \n", " 2\n", @@ -2139,7 +2139,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.09688767345391286, 2.951251600481012...\n", + " [ -0.11718995, -0.12701154, 1.38969...\n", " \n", " \n", " 3\n", @@ -2153,7 +2153,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.15686700764492822, 2.585945891391126...\n", + " [ -0.39025685, -0.25043246, 1.074507...\n", " \n", " \n", " 4\n", @@ -2167,7 +2167,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.13613133440041497, 2.820193808843421...\n", + " [ -0.27732754, -0.26160136, 1.078761...\n", " \n", " \n", " ...\n", @@ -2195,7 +2195,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.643701220752026, 1.257602895023083...\n", + " [ 0.015393024, -0.040650737, 1.001185...\n", " \n", " \n", " 2155\n", @@ -2209,7 +2209,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.6270134925747186, 1.351350566308111...\n", + " [ 0.075038865, 0.014400693, 1.043231...\n", " \n", " \n", " 2156\n", @@ -2223,7 +2223,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.4468312387950375, 1.38293831378890...\n", + " [ -0.085796565, 0.05905571, 1.114640...\n", " \n", " \n", " 2157\n", @@ -2237,7 +2237,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.6746394773845812, 1.611593948841774...\n", + " [ 0.0113782445, -0.26387203, 0.881803...\n", " \n", " \n", " 2158\n", @@ -2251,7 +2251,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.7103215591248637, 1.323178591000971...\n", + " [ 0.48513305, 1.5709875, 0.592935...\n", " \n", " \n", "\n", @@ -2286,17 +2286,17 @@ "2158 True O O 0 \n", "\n", " embedding \n", - "0 [ -0.06799730722665887, 2.664292496984028... \n", - "1 [ -0.7262477871614377, 2.600414199244437... \n", - "2 [ -0.09688767345391286, 2.951251600481012... \n", - "3 [ -0.15686700764492822, 2.585945891391126... \n", - "4 [ -0.13613133440041497, 2.820193808843421... \n", + "0 [ -0.17669655, -0.3989963, 0.908887... \n", + "1 [ -0.3855382, -0.50232756, 1.173232... \n", + "2 [ -0.11718995, -0.12701154, 1.38969... \n", + "3 [ -0.39025685, -0.25043246, 1.074507... \n", + "4 [ -0.27732754, -0.26160136, 1.078761... \n", "... ... \n", - "2154 [ -1.643701220752026, 1.257602895023083... \n", - "2155 [ -1.6270134925747186, 1.351350566308111... \n", - "2156 [ -1.4468312387950375, 1.38293831378890... \n", - "2157 [ -1.6746394773845812, 1.611593948841774... \n", - "2158 [ -1.7103215591248637, 1.323178591000971... \n", + "2154 [ 0.015393024, -0.040650737, 1.001185... \n", + "2155 [ 0.075038865, 0.014400693, 1.043231... \n", + "2156 [ -0.085796565, 0.05905571, 1.114640... \n", + "2157 [ 0.0113782445, -0.26387203, 0.881803... \n", + "2158 [ 0.48513305, 1.5709875, 0.592935... \n", "\n", "[2159 rows x 11 columns]" ] @@ -2383,7 +2383,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.1311553691542877, 2.76648593421354...\n", + " [ -0.098505504, -0.4050192, 0.742888...\n", " \n", " \n", " 1\n", @@ -2399,7 +2399,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.2222068473266146, 2.527425640627292...\n", + " [ -0.057021566, -0.48112106, 0.989868...\n", " \n", " \n", " 2\n", @@ -2415,7 +2415,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.7579851055799667, 2.73181597486195...\n", + " [ -0.04824192, -0.2532998, 1.16719...\n", " \n", " \n", " 3\n", @@ -2431,7 +2431,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.6730784947110267, 2.38562714803554...\n", + " [ -0.26682985, -0.31008705, 1.00747...\n", " \n", " \n", " 4\n", @@ -2447,7 +2447,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.5528018738380444, 2.76605626434104...\n", + " [ -0.22296886, -0.21308525, 0.933102...\n", " \n", " \n", " ...\n", @@ -2479,7 +2479,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.7699805568359452, 1.740577378824614...\n", + " [ -0.02817309, -0.08062352, 0.9804888...\n", " \n", " \n", " 416537\n", @@ -2495,7 +2495,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -2.217042553207956, 1.188014284432918...\n", + " [ 0.118173525, -0.07008511, 0.865484...\n", " \n", " \n", " 416538\n", @@ -2511,7 +2511,7 @@ " PER\n", " B-PER\n", " 4\n", - " [ 0.17265078748216925, 2.21287031816488...\n", + " [ -0.35689434, 0.31400475, 1.573854...\n", " \n", " \n", " 416539\n", @@ -2527,7 +2527,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -2.022874581969901, 1.548629892512103...\n", + " [ -0.18957116, -0.2458116, 0.66257...\n", " \n", " \n", " 416540\n", @@ -2543,7 +2543,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -2.196537811154486, 2.14273333538158...\n", + " [ -0.4468915, -0.31665248, 0.779688...\n", " \n", " \n", "\n", @@ -2591,17 +2591,17 @@ "416540 O 0 \n", "\n", " embedding \n", - "0 [ -1.1311553691542877, 2.76648593421354... \n", - "1 [ -1.2222068473266146, 2.527425640627292... \n", - "2 [ -0.7579851055799667, 2.73181597486195... \n", - "3 [ -0.6730784947110267, 2.38562714803554... \n", - "4 [ -0.5528018738380444, 2.76605626434104... \n", + "0 [ -0.098505504, -0.4050192, 0.742888... \n", + "1 [ -0.057021566, -0.48112106, 0.989868... \n", + "2 [ -0.04824192, -0.2532998, 1.16719... \n", + "3 [ -0.26682985, -0.31008705, 1.00747... \n", + "4 [ -0.22296886, -0.21308525, 0.933102... \n", "... ... \n", - "416536 [ -1.7699805568359452, 1.740577378824614... \n", - "416537 [ -2.217042553207956, 1.188014284432918... \n", - "416538 [ 0.17265078748216925, 2.21287031816488... \n", - "416539 [ -2.022874581969901, 1.548629892512103... \n", - "416540 [ -2.196537811154486, 2.14273333538158... \n", + "416536 [ -0.02817309, -0.08062352, 0.9804888... \n", + "416537 [ 0.118173525, -0.07008511, 0.865484... \n", + "416538 [ -0.35689434, 0.31400475, 1.573854... \n", + "416539 [ -0.18957116, -0.2458116, 0.66257... \n", + "416540 [ -0.4468915, -0.31665248, 0.779688... \n", "\n", "[416541 rows x 13 columns]" ] @@ -2702,7 +2702,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.1311553691542877, 2.76648593421354...\n", + " [ -0.098505504, -0.4050192, 0.742888...\n", " \n", " \n", " 1\n", @@ -2717,7 +2717,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.2222068473266146, 2.527425640627292...\n", + " [ -0.057021566, -0.48112106, 0.989868...\n", " \n", " \n", " 2\n", @@ -2732,7 +2732,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.7579851055799667, 2.73181597486195...\n", + " [ -0.04824192, -0.2532998, 1.16719...\n", " \n", " \n", " 3\n", @@ -2747,7 +2747,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.6730784947110267, 2.38562714803554...\n", + " [ -0.26682985, -0.31008705, 1.00747...\n", " \n", " \n", " 4\n", @@ -2762,7 +2762,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.5528018738380444, 2.76605626434104...\n", + " [ -0.22296886, -0.21308525, 0.933102...\n", " \n", " \n", " ...\n", @@ -2792,7 +2792,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.7699805568359452, 1.740577378824614...\n", + " [ -0.02817309, -0.08062352, 0.9804888...\n", " \n", " \n", " 416537\n", @@ -2807,7 +2807,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -2.217042553207956, 1.188014284432918...\n", + " [ 0.118173525, -0.07008511, 0.865484...\n", " \n", " \n", " 416538\n", @@ -2822,7 +2822,7 @@ " PER\n", " B-PER\n", " 4\n", - " [ 0.17265078748216925, 2.21287031816488...\n", + " [ -0.35689434, 0.31400475, 1.573854...\n", " \n", " \n", " 416539\n", @@ -2837,7 +2837,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -2.022874581969901, 1.548629892512103...\n", + " [ -0.18957116, -0.2458116, 0.66257...\n", " \n", " \n", " 416540\n", @@ -2852,7 +2852,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -2.196537811154486, 2.14273333538158...\n", + " [ -0.4468915, -0.31665248, 0.779688...\n", " \n", " \n", "\n", @@ -2887,17 +2887,17 @@ "416540 True O O 0 \n", "\n", " embedding \n", - "0 [ -1.1311553691542877, 2.76648593421354... \n", - "1 [ -1.2222068473266146, 2.527425640627292... \n", - "2 [ -0.7579851055799667, 2.73181597486195... \n", - "3 [ -0.6730784947110267, 2.38562714803554... \n", - "4 [ -0.5528018738380444, 2.76605626434104... \n", + "0 [ -0.098505504, -0.4050192, 0.742888... \n", + "1 [ -0.057021566, -0.48112106, 0.989868... \n", + "2 [ -0.04824192, -0.2532998, 1.16719... \n", + "3 [ -0.26682985, -0.31008705, 1.00747... \n", + "4 [ -0.22296886, -0.21308525, 0.933102... \n", "... ... \n", - "416536 [ -1.7699805568359452, 1.740577378824614... \n", - "416537 [ -2.217042553207956, 1.188014284432918... \n", - "416538 [ 0.17265078748216925, 2.21287031816488... \n", - "416539 [ -2.022874581969901, 1.548629892512103... \n", - "416540 [ -2.196537811154486, 2.14273333538158... \n", + "416536 [ -0.02817309, -0.08062352, 0.9804888... \n", + "416537 [ 0.118173525, -0.07008511, 0.865484... \n", + "416538 [ -0.35689434, 0.31400475, 1.573854... \n", + "416539 [ -0.18957116, -0.2458116, 0.66257... \n", + "416540 [ -0.4468915, -0.31665248, 0.779688... \n", "\n", "[416541 rows x 12 columns]" ] @@ -2978,7 +2978,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.1311553691542877, 2.76648593421354...\n", + " [ -0.098505504, -0.4050192, 0.742888...\n", " \n", " \n", " 1\n", @@ -2993,7 +2993,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.2222068473266146, 2.527425640627292...\n", + " [ -0.057021566, -0.48112106, 0.989868...\n", " \n", " \n", " 2\n", @@ -3008,7 +3008,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.7579851055799667, 2.73181597486195...\n", + " [ -0.04824192, -0.2532998, 1.16719...\n", " \n", " \n", " 3\n", @@ -3023,7 +3023,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.6730784947110267, 2.38562714803554...\n", + " [ -0.26682985, -0.31008705, 1.00747...\n", " \n", " \n", " 4\n", @@ -3038,7 +3038,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.5528018738380444, 2.76605626434104...\n", + " [ -0.22296886, -0.21308525, 0.933102...\n", " \n", " \n", " ...\n", @@ -3068,7 +3068,7 @@ " ORG\n", " B-ORG\n", " 3\n", - " [ -1.3644899324386204, 0.1387769900935160...\n", + " [ 0.7556371, -0.91891253, -0.1403036...\n", " \n", " \n", " 281105\n", @@ -3083,7 +3083,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.4544672314078606, 1.4293731057006...\n", + " [ -0.11528473, -0.44492027, 0.4715562...\n", " \n", " \n", " 281106\n", @@ -3098,7 +3098,7 @@ " ORG\n", " B-ORG\n", " 3\n", - " [ -1.0318755443110903, 0.4064806114217064...\n", + " [ 0.45602208, -0.8970848, 0.0678616...\n", " \n", " \n", " 281107\n", @@ -3113,7 +3113,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.2597896004962865, 1.395942742925384...\n", + " [ -0.19713743, -0.5427194, 0.294020...\n", " \n", " \n", " 281108\n", @@ -3128,7 +3128,7 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.6741569815858808, 1.901864765138888...\n", + " [ -0.57650733, -0.42160645, 0.994703...\n", " \n", " \n", "\n", @@ -3163,17 +3163,17 @@ "281108 True O O 0 \n", "\n", " embedding \n", - "0 [ -1.1311553691542877, 2.76648593421354... \n", - "1 [ -1.2222068473266146, 2.527425640627292... \n", - "2 [ -0.7579851055799667, 2.73181597486195... \n", - "3 [ -0.6730784947110267, 2.38562714803554... \n", - "4 [ -0.5528018738380444, 2.76605626434104... \n", + "0 [ -0.098505504, -0.4050192, 0.742888... \n", + "1 [ -0.057021566, -0.48112106, 0.989868... \n", + "2 [ -0.04824192, -0.2532998, 1.16719... \n", + "3 [ -0.26682985, -0.31008705, 1.00747... \n", + "4 [ -0.22296886, -0.21308525, 0.933102... \n", "... ... \n", - "281104 [ -1.3644899324386204, 0.1387769900935160... \n", - "281105 [ -1.4544672314078606, 1.4293731057006... \n", - "281106 [ -1.0318755443110903, 0.4064806114217064... \n", - "281107 [ -1.2597896004962865, 1.395942742925384... \n", - "281108 [ -1.6741569815858808, 1.901864765138888... \n", + "281104 [ 0.7556371, -0.91891253, -0.1403036... \n", + "281105 [ -0.11528473, -0.44492027, 0.4715562... \n", + "281106 [ 0.45602208, -0.8970848, 0.0678616... \n", + "281107 [ -0.19713743, -0.5427194, 0.294020... \n", + "281108 [ -0.57650733, -0.42160645, 0.994703... \n", "\n", "[281109 rows x 12 columns]" ] @@ -3195,29 +3195,127 @@ "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n" + "RUNNING THE L-BFGS-B CODE\n", + "\n", + " * * *\n", + "\n", + "Machine precision = 2.220D-16\n", + " N = 6921 M = 10\n", + "\n", + "At X0 0 variables are exactly at the bounds\n", + "\n", + "At iterate 0 f= 6.17660D+05 |proj g|= 4.23293D+05\n" ] }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "CPU times: user 46min 3s, sys: 4min 23s, total: 50min 27s\n", - "Wall time: 6min 22s\n" + " This problem is unconstrained.\n" ] }, { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 6.4min finished\n" + "\n", + "At iterate 50 f= 1.22005D+04 |proj g|= 2.48275D+02\n", + "\n", + "At iterate 100 f= 8.87639D+03 |proj g|= 1.72205D+02\n", + "\n", + "At iterate 150 f= 8.07946D+03 |proj g|= 1.28633D+02\n", + "\n", + "At iterate 200 f= 7.87840D+03 |proj g|= 6.20068D+01\n", + "\n", + "At iterate 250 f= 7.81730D+03 |proj g|= 9.11741D+00\n", + "\n", + "At iterate 300 f= 7.80144D+03 |proj g|= 6.86435D+00\n", + "\n", + "At iterate 350 f= 7.79623D+03 |proj g|= 7.21843D+00\n", + "\n", + "At iterate 400 f= 7.79451D+03 |proj g|= 5.64213D+00\n", + "\n", + "At iterate 450 f= 7.79356D+03 |proj g|= 2.47884D+00\n", + "\n", + "At iterate 500 f= 7.79273D+03 |proj g|= 2.32130D+00\n", + "\n", + "At iterate 550 f= 7.79141D+03 |proj g|= 1.03513D+01\n", + "\n", + "At iterate 600 f= 7.78944D+03 |proj g|= 4.39763D+00\n", + "\n", + "At iterate 650 f= 7.78798D+03 |proj g|= 2.72198D+00\n", + "\n", + "At iterate 700 f= 7.78721D+03 |proj g|= 2.49312D+00\n", + "\n", + "At iterate 750 f= 7.78691D+03 |proj g|= 2.09049D+00\n", + "\n", + "At iterate 800 f= 7.78678D+03 |proj g|= 1.56225D+00\n", + "\n", + "At iterate 850 f= 7.78669D+03 |proj g|= 9.61272D-01\n", + "\n", + "At iterate 900 f= 7.78660D+03 |proj g|= 1.88970D+00\n", + "\n", + "At iterate 950 f= 7.78644D+03 |proj g|= 1.39468D+00\n", + "\n", + "At iterate 1000 f= 7.78615D+03 |proj g|= 1.56165D+00\n", + "\n", + "At iterate 1050 f= 7.78593D+03 |proj g|= 1.81700D+00\n", + "\n", + "At iterate 1100 f= 7.78581D+03 |proj g|= 1.11273D+00\n", + "\n", + "At iterate 1150 f= 7.78577D+03 |proj g|= 4.10524D-01\n", + "\n", + "At iterate 1200 f= 7.78575D+03 |proj g|= 3.49336D-01\n", + "\n", + "At iterate 1250 f= 7.78574D+03 |proj g|= 8.20185D-01\n", + "\n", + "At iterate 1300 f= 7.78571D+03 |proj g|= 9.94495D-01\n", + "\n", + "At iterate 1350 f= 7.78567D+03 |proj g|= 7.14421D-01\n", + "\n", + "At iterate 1400 f= 7.78563D+03 |proj g|= 3.46513D-01\n", + "\n", + "At iterate 1450 f= 7.78561D+03 |proj g|= 1.15784D+00\n", + "\n", + "At iterate 1500 f= 7.78559D+03 |proj g|= 5.66811D-01\n", + "\n", + "At iterate 1550 f= 7.78559D+03 |proj g|= 1.43156D-01\n", + "\n", + "At iterate 1600 f= 7.78558D+03 |proj g|= 1.60595D-01\n", + "\n", + " * * *\n", + "\n", + "Tit = total number of iterations\n", + "Tnf = total number of function evaluations\n", + "Tnint = total number of segments explored during Cauchy searches\n", + "Skip = number of BFGS updates skipped\n", + "Nact = number of active bounds at final generalized Cauchy point\n", + "Projg = norm of the final projected gradient\n", + "F = final function value\n", + "\n", + " * * *\n", + "\n", + " N Tit Tnf Tnint Skip Nact Projg F\n", + " 6921 1604 1694 1 0 0 4.829D-01 7.786D+03\n", + " F = 7785.5829997825367 \n", + "\n", + "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH \n", + "CPU times: user 1h 34min 15s, sys: 6min 41s, total: 1h 40min 56s\n", + "Wall time: 12min 44s\n" ] }, { "data": { + "text/html": [ + "
Pipeline(steps=[('mlogreg',\n",
+       "                 LogisticRegression(C=0.1, max_iter=10000,\n",
+       "                                    multi_class='multinomial', verbose=1))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ "Pipeline(steps=[('mlogreg',\n", " LogisticRegression(C=0.1, max_iter=10000,\n", @@ -3289,7 +3387,8 @@ " and `predicted_class_pr`.\n", " \"\"\"\n", " result_df = df.copy()\n", - " class_pr = tp.TensorArray(predictor.predict_proba(result_df[\"embedding\"]))\n", + " embeddings = result_df[\"embedding\"].to_numpy()\n", + " class_pr = tp.TensorArray(predictor.predict_proba(embeddings))\n", " result_df[\"predicted_id\"] = np.argmax(class_pr, axis=1)\n", " result_df[\"predicted_class\"] = [id_to_class[i]\n", " for i in result_df[\"predicted_id\"].values]\n", @@ -3359,12 +3458,12 @@ " <NA>\n", " O\n", " 0\n", - " [ 0.07419002371155237, 2.81491930509171...\n", + " [ -0.19626583, -0.450937, 0.6775361...\n", " 0\n", " O\n", " O\n", " None\n", - " [ 0.9997307514975134, 5.294607015948672e-0...\n", + " [ 0.9994774788863705, 1.9985127298723906e-0...\n", " \n", " \n", " 351002\n", @@ -3379,12 +3478,12 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.7553124891222318, 2.712434591871051...\n", + " [ -0.3187211, -0.5074784, 1.046454...\n", " 0\n", " O\n", " O\n", " None\n", - " [ 0.9980035154999108, 1.533050022629027e-0...\n", + " [ 0.9992964240340214, 3.7581023374440964e-0...\n", " \n", " \n", " 351003\n", @@ -3399,12 +3498,12 @@ " <NA>\n", " O\n", " 0\n", - " [ 0.11465290957193339, 3.11397875179331...\n", + " [ -0.080538824, -0.2477481, 1.356255...\n", " 0\n", " O\n", " O\n", " None\n", - " [ 0.9969301297651303, 0.000670705720761996...\n", + " [ 0.998973288221842, 0.0004299715907382311...\n", " \n", " \n", " 351004\n", @@ -3419,12 +3518,12 @@ " <NA>\n", " O\n", " 0\n", - " [ -0.14387838512527962, 2.9257680850885...\n", + " [ -0.6878579, -0.30290246, 0.8842714...\n", " 0\n", " O\n", " O\n", " None\n", - " [ 0.9990384089044105, 8.475109949412816e-0...\n", + " [ 0.9983217119367633, 4.888114850946988e-0...\n", " \n", " \n", " 351005\n", @@ -3439,12 +3538,12 @@ " <NA>\n", " O\n", " 0\n", - " [ 0.08375985078305932, 3.067161861783276...\n", + " [ -0.2963228, -0.23313177, 0.93988...\n", " 0\n", " O\n", " O\n", " None\n", - " [ 0.9996995206821001, 6.044135027078061e-0...\n", + " [ 0.9999185106741023, 8.938753477308423e-0...\n", " \n", " \n", "\n", @@ -3466,11 +3565,11 @@ "351005 False O O 0 \n", "\n", " embedding predicted_id \\\n", - "351001 [ 0.07419002371155237, 2.81491930509171... 0 \n", - "351002 [ -0.7553124891222318, 2.712434591871051... 0 \n", - "351003 [ 0.11465290957193339, 3.11397875179331... 0 \n", - "351004 [ -0.14387838512527962, 2.9257680850885... 0 \n", - "351005 [ 0.08375985078305932, 3.067161861783276... 0 \n", + "351001 [ -0.19626583, -0.450937, 0.6775361... 0 \n", + "351002 [ -0.3187211, -0.5074784, 1.046454... 0 \n", + "351003 [ -0.080538824, -0.2477481, 1.356255... 0 \n", + "351004 [ -0.6878579, -0.30290246, 0.8842714... 0 \n", + "351005 [ -0.2963228, -0.23313177, 0.93988... 0 \n", "\n", " predicted_class predicted_iob predicted_type \\\n", "351001 O O None \n", @@ -3480,11 +3579,11 @@ "351005 O O None \n", "\n", " predicted_class_pr \n", - "351001 [ 0.9997307514975134, 5.294607015948672e-0... \n", - "351002 [ 0.9980035154999108, 1.533050022629027e-0... \n", - "351003 [ 0.9969301297651303, 0.000670705720761996... \n", - "351004 [ 0.9990384089044105, 8.475109949412816e-0... \n", - "351005 [ 0.9996995206821001, 6.044135027078061e-0... " + "351001 [ 0.9994774788863705, 1.9985127298723906e-0... \n", + "351002 [ 0.9992964240340214, 3.7581023374440964e-0... \n", + "351003 [ 0.998973288221842, 0.0004299715907382311... \n", + "351004 [ 0.9983217119367633, 4.888114850946988e-0... \n", + "351005 [ 0.9999185106741023, 8.938753477308423e-0... " ] }, "execution_count": 25, @@ -3557,12 +3656,12 @@ " PER\n", " I-PER\n", " 8\n", - " [ 0.06028430363940268, 2.833449942439...\n", - " 5\n", - " I-LOC\n", + " [ -0.21029201, -0.8535674, 0.0002756594...\n", + " 6\n", + " I-MISC\n", " I\n", - " LOC\n", - " [ 0.05335241986368567, 0.01558548709678581...\n", + " MISC\n", + " [ 0.0010111308810159478, 1.6209660863726316e-0...\n", " \n", " \n", " 351042\n", @@ -3577,12 +3676,12 @@ " PER\n", " I-PER\n", " 8\n", - " [ 0.011815326065059528, 2.4804891126405...\n", - " 5\n", - " I-LOC\n", + " [ -0.23205486, -0.9290767, 0.3889118...\n", + " 6\n", + " I-MISC\n", " I\n", - " LOC\n", - " [ 0.26071159739023836, 0.0810894424222212...\n", + " MISC\n", + " [ 0.012755027203264928, 0.00554094580945546...\n", " \n", " \n", " 351043\n", @@ -3597,12 +3696,12 @@ " PER\n", " I-PER\n", " 8\n", - " [ 0.1896747233694964, 2.0841390182245...\n", + " [ 0.36844134, -0.68091154, -0.1059106...\n", " 5\n", " I-LOC\n", " I\n", " LOC\n", - " [ 0.0008087046569282995, 0.01409121178547858...\n", + " [ 0.008349822538261149, 0.180904633782168...\n", " \n", " \n", " 351044\n", @@ -3617,12 +3716,12 @@ " PER\n", " I-PER\n", " 8\n", - " [ -0.08919079934068028, 2.673042893674...\n", - " 5\n", - " I-LOC\n", + " [ -0.30131084, -0.6546019, -0.1726912...\n", + " 8\n", + " I-PER\n", " I\n", - " LOC\n", - " [ 0.01641422864584388, 0.01922057245520043...\n", + " PER\n", + " [ 0.013398092974719904, 0.000889872066127380...\n", " \n", " \n", " 351045\n", @@ -3637,12 +3736,12 @@ " PER\n", " I-PER\n", " 8\n", - " [ -0.5675588015558329, 2.1915603140880...\n", + " [ -0.1611614, -0.69891113, 0.2342468...\n", " 5\n", " I-LOC\n", " I\n", " LOC\n", - " [ 0.06287713949432004, 0.05853431405140322...\n", + " [ 0.014927046511081343, 0.0209250472885050...\n", " \n", " \n", " 351046\n", @@ -3657,12 +3756,12 @@ " LOC\n", " B-LOC\n", " 1\n", - " [ -0.025756110031628202, 2.4176568055402...\n", + " [ -0.058567554, -0.79558676, 0.3360603...\n", " 1\n", " B-LOC\n", " B\n", " LOC\n", - " [ 0.002164163302129627, 0.533655982403914...\n", + " [ 0.027281135850703336, 0.532249166723370...\n", " \n", " \n", " 351047\n", @@ -3677,12 +3776,12 @@ " LOC\n", " I-LOC\n", " 5\n", - " [ -0.8143908150954474, 2.2432229840625...\n", + " [ 0.2037595, -0.73730904, -0.0888521...\n", " 5\n", " I-LOC\n", " I\n", " LOC\n", - " [ 0.40170332018342714, 0.01464842540432879...\n", + " [ 0.22512840995098554, 0.00379439656874946...\n", " \n", " \n", " 351048\n", @@ -3697,12 +3796,12 @@ " LOC\n", " I-LOC\n", " 5\n", - " [ -0.7613811626814251, 2.1040792203968...\n", + " [ -0.10341229, -0.33681834, 0.1738456...\n", " 5\n", " I-LOC\n", " I\n", " LOC\n", - " [ 0.04547783920785417, 0.370807027150105...\n", + " [ 0.04472568023866835, 0.436126151622446...\n", " \n", " \n", " 351049\n", @@ -3717,12 +3816,12 @@ " LOC\n", " I-LOC\n", " 5\n", - " [ -0.5023455357742641, 2.467216928215...\n", + " [ -0.4054268, -0.6516522, 0.2469...\n", " 5\n", " I-LOC\n", " I\n", " LOC\n", - " [ 0.0014782178334539389, 0.01311886606422394...\n", + " [ 0.0009405393288526446, 0.00244544190700176...\n", " \n", " \n", " 351050\n", @@ -3737,12 +3836,12 @@ " <NA>\n", " O\n", " 0\n", - " [ -1.0898376005782766, 2.4839734026886...\n", + " [ -0.16829254, -0.6475861, 0.8149025...\n", " 0\n", " O\n", " O\n", " None\n", - " [ 0.9997009893806189, 3.928979951597114e-0...\n", + " [ 0.9999736550716568, 5.7005018158771435e-0...\n", " \n", " \n", "\n", @@ -3774,22 +3873,22 @@ "351050 False O O 0 \n", "\n", " embedding predicted_id \\\n", - "351041 [ 0.06028430363940268, 2.833449942439... 5 \n", - "351042 [ 0.011815326065059528, 2.4804891126405... 5 \n", - "351043 [ 0.1896747233694964, 2.0841390182245... 5 \n", - "351044 [ -0.08919079934068028, 2.673042893674... 5 \n", - "351045 [ -0.5675588015558329, 2.1915603140880... 5 \n", - "351046 [ -0.025756110031628202, 2.4176568055402... 1 \n", - "351047 [ -0.8143908150954474, 2.2432229840625... 5 \n", - "351048 [ -0.7613811626814251, 2.1040792203968... 5 \n", - "351049 [ -0.5023455357742641, 2.467216928215... 5 \n", - "351050 [ -1.0898376005782766, 2.4839734026886... 0 \n", + "351041 [ -0.21029201, -0.8535674, 0.0002756594... 6 \n", + "351042 [ -0.23205486, -0.9290767, 0.3889118... 6 \n", + "351043 [ 0.36844134, -0.68091154, -0.1059106... 5 \n", + "351044 [ -0.30131084, -0.6546019, -0.1726912... 8 \n", + "351045 [ -0.1611614, -0.69891113, 0.2342468... 5 \n", + "351046 [ -0.058567554, -0.79558676, 0.3360603... 1 \n", + "351047 [ 0.2037595, -0.73730904, -0.0888521... 5 \n", + "351048 [ -0.10341229, -0.33681834, 0.1738456... 5 \n", + "351049 [ -0.4054268, -0.6516522, 0.2469... 5 \n", + "351050 [ -0.16829254, -0.6475861, 0.8149025... 0 \n", "\n", " predicted_class predicted_iob predicted_type \\\n", - "351041 I-LOC I LOC \n", - "351042 I-LOC I LOC \n", + "351041 I-MISC I MISC \n", + "351042 I-MISC I MISC \n", "351043 I-LOC I LOC \n", - "351044 I-LOC I LOC \n", + "351044 I-PER I PER \n", "351045 I-LOC I LOC \n", "351046 B-LOC B LOC \n", "351047 I-LOC I LOC \n", @@ -3798,16 +3897,16 @@ "351050 O O None \n", "\n", " predicted_class_pr \n", - "351041 [ 0.05335241986368567, 0.01558548709678581... \n", - "351042 [ 0.26071159739023836, 0.0810894424222212... \n", - "351043 [ 0.0008087046569282995, 0.01409121178547858... \n", - "351044 [ 0.01641422864584388, 0.01922057245520043... \n", - "351045 [ 0.06287713949432004, 0.05853431405140322... \n", - "351046 [ 0.002164163302129627, 0.533655982403914... \n", - "351047 [ 0.40170332018342714, 0.01464842540432879... \n", - "351048 [ 0.04547783920785417, 0.370807027150105... \n", - "351049 [ 0.0014782178334539389, 0.01311886606422394... \n", - "351050 [ 0.9997009893806189, 3.928979951597114e-0... " + "351041 [ 0.0010111308810159478, 1.6209660863726316e-0... \n", + "351042 [ 0.012755027203264928, 0.00554094580945546... \n", + "351043 [ 0.008349822538261149, 0.180904633782168... \n", + "351044 [ 0.013398092974719904, 0.000889872066127380... \n", + "351045 [ 0.014927046511081343, 0.0209250472885050... \n", + "351046 [ 0.027281135850703336, 0.532249166723370... \n", + "351047 [ 0.22512840995098554, 0.00379439656874946... \n", + "351048 [ 0.04472568023866835, 0.436126151622446... \n", + "351049 [ 0.0009405393288526446, 0.00244544190700176... \n", + "351050 [ 0.9999736550716568, 5.7005018158771435e-0... " ] }, "execution_count": 26, @@ -3877,7 +3976,7 @@ " I\n", " PER\n", " I\n", - " LOC\n", + " MISC\n", " \n", " \n", " 41\n", @@ -3886,7 +3985,7 @@ " I\n", " PER\n", " I\n", - " LOC\n", + " MISC\n", " \n", " \n", " 42\n", @@ -3904,7 +4003,7 @@ " I\n", " PER\n", " I\n", - " LOC\n", + " PER\n", " \n", " \n", " 44\n", @@ -4078,10 +4177,10 @@ "59 59 [124, 129): 'began' O O \n", "\n", " predicted_type \n", - "40 LOC \n", - "41 LOC \n", + "40 MISC \n", + "41 MISC \n", "42 LOC \n", - "43 LOC \n", + "43 PER \n", "44 LOC \n", "45 LOC \n", "46 LOC \n", @@ -4178,12 +4277,12 @@ " \n", " 2\n", " [40, 45): 'CHINA'\n", - " LOC\n", + " ORG\n", " \n", " \n", " 3\n", " [66, 77): 'Nadim Ladki'\n", - " PER\n", + " LOC\n", " \n", " \n", " 4\n", @@ -4198,8 +4297,8 @@ " span ent_type\n", "0 [19, 24): 'JAPAN' PER\n", "1 [29, 34): 'LUCKY' LOC\n", - "2 [40, 45): 'CHINA' LOC\n", - "3 [66, 77): 'Nadim Ladki' PER\n", + "2 [40, 45): 'CHINA' ORG\n", + "3 [66, 77): 'Nadim Ladki' LOC\n", "4 [78, 84): 'AL-AIN' LOC" ] }, @@ -4261,12 +4360,12 @@ " 0\n", " test\n", " 0\n", - " 42\n", - " 46\n", + " 41\n", + " 47\n", " 45\n", - " 0.913043\n", - " 0.933333\n", - " 0.923077\n", + " 0.872340\n", + " 0.911111\n", + " 0.891304\n", " \n", " \n", " 1\n", @@ -4294,23 +4393,23 @@ " 3\n", " test\n", " 3\n", - " 41\n", - " 45\n", + " 42\n", " 44\n", - " 0.911111\n", - " 0.931818\n", - " 0.921348\n", + " 44\n", + " 0.954545\n", + " 0.954545\n", + " 0.954545\n", " \n", " \n", " 4\n", " test\n", " 4\n", - " 17\n", + " 18\n", " 19\n", " 19\n", - " 0.894737\n", - " 0.894737\n", - " 0.894737\n", + " 0.947368\n", + " 0.947368\n", + " 0.947368\n", " \n", " \n", " ...\n", @@ -4327,12 +4426,12 @@ " 226\n", " test\n", " 226\n", + " 6\n", " 7\n", " 7\n", - " 7\n", - " 1.000000\n", - " 1.000000\n", - " 1.000000\n", + " 0.857143\n", + " 0.857143\n", + " 0.857143\n", " \n", " \n", " 227\n", @@ -4350,33 +4449,33 @@ " test\n", " 228\n", " 24\n", - " 25\n", + " 28\n", " 27\n", - " 0.960000\n", + " 0.857143\n", " 0.888889\n", - " 0.923077\n", + " 0.872727\n", " \n", " \n", " 229\n", " test\n", " 229\n", - " 26\n", + " 25\n", " 27\n", " 27\n", - " 0.962963\n", - " 0.962963\n", - " 0.962963\n", + " 0.925926\n", + " 0.925926\n", + " 0.925926\n", " \n", " \n", " 230\n", " test\n", " 230\n", - " 24\n", + " 25\n", " 27\n", " 28\n", - " 0.888889\n", - " 0.857143\n", - " 0.872727\n", + " 0.925926\n", + " 0.892857\n", + " 0.909091\n", " \n", " \n", "\n", @@ -4385,30 +4484,30 @@ ], "text/plain": [ " fold doc_num num_true_positives num_extracted num_entities \\\n", - "0 test 0 42 46 45 \n", + "0 test 0 41 47 45 \n", "1 test 1 41 42 44 \n", "2 test 2 52 54 54 \n", - "3 test 3 41 45 44 \n", - "4 test 4 17 19 19 \n", + "3 test 3 42 44 44 \n", + "4 test 4 18 19 19 \n", ".. ... ... ... ... ... \n", - "226 test 226 7 7 7 \n", + "226 test 226 6 7 7 \n", "227 test 227 18 19 21 \n", - "228 test 228 24 25 27 \n", - "229 test 229 26 27 27 \n", - "230 test 230 24 27 28 \n", + "228 test 228 24 28 27 \n", + "229 test 229 25 27 27 \n", + "230 test 230 25 27 28 \n", "\n", " precision recall F1 \n", - "0 0.913043 0.933333 0.923077 \n", + "0 0.872340 0.911111 0.891304 \n", "1 0.976190 0.931818 0.953488 \n", "2 0.962963 0.962963 0.962963 \n", - "3 0.911111 0.931818 0.921348 \n", - "4 0.894737 0.894737 0.894737 \n", + "3 0.954545 0.954545 0.954545 \n", + "4 0.947368 0.947368 0.947368 \n", ".. ... ... ... \n", - "226 1.000000 1.000000 1.000000 \n", + "226 0.857143 0.857143 0.857143 \n", "227 0.947368 0.857143 0.900000 \n", - "228 0.960000 0.888889 0.923077 \n", - "229 0.962963 0.962963 0.962963 \n", - "230 0.888889 0.857143 0.872727 \n", + "228 0.857143 0.888889 0.872727 \n", + "229 0.925926 0.925926 0.925926 \n", + "230 0.925926 0.892857 0.909091 \n", "\n", "[231 rows x 8 columns]" ] @@ -4432,12 +4531,12 @@ { "data": { "text/plain": [ - "{'num_true_positives': 4749,\n", + "{'num_true_positives': 4881,\n", " 'num_entities': 5648,\n", - " 'num_extracted': 5591,\n", - " 'precision': 0.8494008227508496,\n", - " 'recall': 0.8408286118980169,\n", - " 'F1': 0.8450929798024736}" + " 'num_extracted': 5620,\n", + " 'precision': 0.8685053380782918,\n", + " 'recall': 0.8641997167138811,\n", + " 'F1': 0.8663471778487754}" ] }, "execution_count": 30, @@ -5074,7 +5173,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e808850bda33440b80da729bcd7dbbb9", + "model_id": "68bcf2a129584acf9b7ce27ccd66302a", "version_major": 2, "version_minor": 0 }, @@ -5235,12 +5334,12 @@ " 0\n", " test\n", " 0\n", - " 43\n", - " 46\n", + " 42\n", + " 47\n", " 45\n", - " 0.934783\n", - " 0.955556\n", - " 0.945055\n", + " 0.893617\n", + " 0.933333\n", + " 0.913043\n", " \n", " \n", " 1\n", @@ -5279,12 +5378,12 @@ " 4\n", " test\n", " 4\n", - " 17\n", + " 18\n", " 19\n", " 19\n", - " 0.894737\n", - " 0.894737\n", - " 0.894737\n", + " 0.947368\n", + " 0.947368\n", + " 0.947368\n", " \n", " \n", " ...\n", @@ -5324,11 +5423,11 @@ " test\n", " 228\n", " 24\n", - " 25\n", " 27\n", - " 0.960000\n", + " 27\n", + " 0.888889\n", + " 0.888889\n", " 0.888889\n", - " 0.923077\n", " \n", " \n", " 229\n", @@ -5345,12 +5444,12 @@ " 230\n", " test\n", " 230\n", - " 25\n", + " 26\n", " 27\n", " 28\n", - " 0.925926\n", - " 0.892857\n", - " 0.909091\n", + " 0.962963\n", + " 0.928571\n", + " 0.945455\n", " \n", " \n", "\n", @@ -5359,30 +5458,30 @@ ], "text/plain": [ " fold doc_num num_true_positives num_extracted num_entities \\\n", - "0 test 0 43 46 45 \n", + "0 test 0 42 47 45 \n", "1 test 1 41 42 44 \n", "2 test 2 52 54 54 \n", "3 test 3 42 44 44 \n", - "4 test 4 17 19 19 \n", + "4 test 4 18 19 19 \n", ".. ... ... ... ... ... \n", "226 test 226 7 7 7 \n", "227 test 227 18 19 21 \n", - "228 test 228 24 25 27 \n", + "228 test 228 24 27 27 \n", "229 test 229 26 27 27 \n", - "230 test 230 25 27 28 \n", + "230 test 230 26 27 28 \n", "\n", " precision recall F1 \n", - "0 0.934783 0.955556 0.945055 \n", + "0 0.893617 0.933333 0.913043 \n", "1 0.976190 0.931818 0.953488 \n", "2 0.962963 0.962963 0.962963 \n", "3 0.954545 0.954545 0.954545 \n", - "4 0.894737 0.894737 0.894737 \n", + "4 0.947368 0.947368 0.947368 \n", ".. ... ... ... \n", "226 1.000000 1.000000 1.000000 \n", "227 0.947368 0.857143 0.900000 \n", - "228 0.960000 0.888889 0.923077 \n", + "228 0.888889 0.888889 0.888889 \n", "229 0.962963 0.962963 0.962963 \n", - "230 0.925926 0.892857 0.909091 \n", + "230 0.962963 0.928571 0.945455 \n", "\n", "[231 rows x 8 columns]" ] @@ -5406,12 +5505,12 @@ { "data": { "text/plain": [ - "{'num_true_positives': 4893,\n", + "{'num_true_positives': 4971,\n", " 'num_entities': 5648,\n", - " 'num_extracted': 5520,\n", - " 'precision': 0.8864130434782609,\n", - " 'recall': 0.8663243626062322,\n", - " 'F1': 0.8762535816618912}" + " 'num_extracted': 5587,\n", + " 'precision': 0.889744048684446,\n", + " 'recall': 0.8801345609065155,\n", + " 'F1': 0.8849132176234981}" ] }, "execution_count": 38, @@ -5449,7 +5548,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/notebooks/Read_conllu_Files.ipynb b/notebooks/Read_conllu_Files.ipynb index b977df8d..248e745a 100644 --- a/notebooks/Read_conllu_Files.ipynb +++ b/notebooks/Read_conllu_Files.ipynb @@ -30,8 +30,6 @@ "import sys\n", "import numpy as np\n", "import pandas as pd\n", - "import json\n", - "import feather\n", "import sklearn.pipeline\n", "import sklearn.linear_model\n", "import transformers\n", @@ -47,8 +45,7 @@ " raise e\n", " if \"..\" not in sys.path:\n", " sys.path.insert(0, \"..\")\n", - " import text_extensions_for_pandas as tp\n", - " " + " import text_extensions_for_pandas as tp\n" ] }, { @@ -666,7 +663,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "size is 25151\n" + "size is 25152\n" ] }, { @@ -747,48 +744,48 @@ " ...\n", " \n", " \n", - " 25146\n", + " 25147\n", " [251, 254): 'and'\n", " and\n", " CCONJ\n", - " 25150\n", + " 25151\n", " cc\n", " \n", " \n", - " 25147\n", + " 25148\n", " [255, 256): 'a'\n", " a\n", " DET\n", - " 25150\n", + " 25151\n", " det\n", " \n", " \n", - " 25148\n", + " 25149\n", " [257, 261): 'very'\n", " very\n", " ADV\n", - " 25149\n", + " 25150\n", " advmod\n", " \n", " \n", - " 25149\n", + " 25150\n", " [262, 275): 'knowledgeable'\n", " knowledgeable\n", " ADJ\n", - " 25150\n", + " 25151\n", " amod\n", " \n", " \n", - " 25150\n", + " 25151\n", " [276, 281): 'staff'\n", " staff\n", " NOUN\n", - " 25145\n", + " 25146\n", " conj\n", " \n", " \n", "\n", - "

25151 rows × 5 columns

\n", + "

25152 rows × 5 columns

\n", "" ], "text/plain": [ @@ -799,13 +796,13 @@ "3 [12, 17): 'comes' come VERB root\n", "4 [18, 22): 'this' this DET 5 det\n", "... ... ... ... ... ...\n", - "25146 [251, 254): 'and' and CCONJ 25150 cc\n", - "25147 [255, 256): 'a' a DET 25150 det\n", - "25148 [257, 261): 'very' very ADV 25149 advmod\n", - "25149 [262, 275): 'knowledgeable' knowledgeable ADJ 25150 amod\n", - "25150 [276, 281): 'staff' staff NOUN 25145 conj\n", + "25147 [251, 254): 'and' and CCONJ 25151 cc\n", + "25148 [255, 256): 'a' a DET 25151 det\n", + "25149 [257, 261): 'very' very ADV 25150 advmod\n", + "25150 [262, 275): 'knowledgeable' knowledgeable ADJ 25151 amod\n", + "25151 [276, 281): 'staff' staff NOUN 25146 conj\n", "\n", - "[25151 rows x 5 columns]" + "[25152 rows x 5 columns]" ] }, "execution_count": 5, @@ -814,7 +811,8 @@ } ], "source": [ - "# because we are concatenating our dataframes, we need to modify the \"head\" feilds to still point at their desired targets \n", + "# Because we are concatenating our dataframes, we need to modify the \"head\" \n", + "# fields to still point at their desired targets \n", "df_starts_at =0\n", "temp = conll_u_docs.copy()\n", "for df in temp:\n", @@ -822,8 +820,7 @@ " df_starts_at += df.shape[0]\n", "\n", "# Now concatenate all our documents into one big dataframe\n", - "complete_df = temp[0]\n", - "complete_df = complete_df.append(temp[1:], ignore_index=True)\n", + "complete_df = pd.concat(temp, ignore_index=True)\n", "\n", "#show the last few rows of the dataframe, select just a few columns for compactness\n", "print(f\"size is {complete_df.shape[0]}\")\n", @@ -842,7 +839,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns\n", + "CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs\n", "Wall time: 4.05 µs\n", "File written to CoNLL_u_test_inputs/conllu_database.feather\n" ] @@ -877,9 +874,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1 µs, sys: 1e+03 ns, total: 2 µs\n", - "Wall time: 4.77 µs\n", - "size is 25151\n" + "CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs\n", + "Wall time: 6.91 µs\n", + "size is 25152\n" ] }, { @@ -912,43 +909,43 @@ " \n", " \n", " \n", - " 25146\n", + " 25147\n", " [251, 254): 'and'\n", " and\n", " CCONJ\n", - " 25150.0\n", + " 25151.0\n", " cc\n", " \n", " \n", - " 25147\n", + " 25148\n", " [255, 256): 'a'\n", " a\n", " DET\n", - " 25150.0\n", + " 25151.0\n", " det\n", " \n", " \n", - " 25148\n", + " 25149\n", " [257, 261): 'very'\n", " very\n", " ADV\n", - " 25149.0\n", + " 25150.0\n", " advmod\n", " \n", " \n", - " 25149\n", + " 25150\n", " [262, 275): 'knowledgeable'\n", " knowledgeable\n", " ADJ\n", - " 25150.0\n", + " 25151.0\n", " amod\n", " \n", " \n", - " 25150\n", + " 25151\n", " [276, 281): 'staff'\n", " staff\n", " NOUN\n", - " 25145.0\n", + " 25146.0\n", " conj\n", " \n", " \n", @@ -957,11 +954,11 @@ ], "text/plain": [ " span lemma upostag head deprel\n", - "25146 [251, 254): 'and' and CCONJ 25150.0 cc\n", - "25147 [255, 256): 'a' a DET 25150.0 det\n", - "25148 [257, 261): 'very' very ADV 25149.0 advmod\n", - "25149 [262, 275): 'knowledgeable' knowledgeable ADJ 25150.0 amod\n", - "25150 [276, 281): 'staff' staff NOUN 25145.0 conj" + "25147 [251, 254): 'and' and CCONJ 25151.0 cc\n", + "25148 [255, 256): 'a' a DET 25151.0 det\n", + "25149 [257, 261): 'very' very ADV 25150.0 advmod\n", + "25150 [262, 275): 'knowledgeable' knowledgeable ADJ 25151.0 amod\n", + "25151 [276, 281): 'staff' staff NOUN 25146.0 conj" ] }, "execution_count": 7, @@ -1156,7 +1153,7 @@ { "data": { "text/html": [ - "\n", + "\n", "\n", " And\n", " CCONJ\n", @@ -1203,65 +1200,65 @@ "\n", "\n", "\n", - " \n", + " \n", " \n", - " cc\n", + " cc\n", " \n", " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " obj\n", + " obj\n", " \n", " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " aux\n", + " aux\n", " \n", " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " nsubj\n", + " nsubj\n", " \n", " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " case\n", + " case\n", " \n", " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " det\n", + " det\n", " \n", " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " obl\n", + " obl\n", " \n", " \n", "\n", "\n", "\n", - " \n", + " \n", " \n", - " punct\n", + " punct\n", " \n", " \n", "\n", @@ -1400,7 +1397,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7eef3add20ee4a2daf2fddebf1117ea5", + "model_id": "b5bfda1ee61d424f8a08c4456084d9c4", "version_major": 2, "version_minor": 0 }, @@ -1428,7 +1425,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9e4dcd12fdb6412095389c26ce08aa7d", + "model_id": "3e67e5add4354708b726f4d5bb1de9e8", "version_major": 2, "version_minor": 0 }, @@ -1449,7 +1446,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c3b6b3ada3d24c04aced9f684c878328", + "model_id": "ebdb788b384443348ee614a1e0fdcf72", "version_major": 2, "version_minor": 0 }, @@ -1468,14 +1465,15 @@ " temp = tp.io.bert.make_bert_tokens(document.loc[0,'span'].target_text, tokenizer)\n", " # re-correlate our original spans with their bert-compatible equivalents\n", " spans = tp.TokenSpanArray.align_to_tokens(temp[\"span\"],document[\"span\"])\n", - " \n", - " # now carry over some features from the old spans to the new onesspans_df = spans.as_frame().drop(columns = [\"begin\",\"end\"])\n", - " spans_df = spans.as_frame().drop(columns = ['begin','end','covered_text'])\n", + "\n", + " # now carry over some features from the old spans to the new ones\n", + " #spans_df = spans.as_frame().drop(columns = [\"begin\",\"end\"])\n", + " spans_df = spans.as_frame().drop(columns=['begin','end','covered_text'])\n", " spans_df['postag'] = document['upostag']\n", - " printed = 20\n", - " for i,b_tok,e_tok,pos in spans_df.itertuples():\n", - " temp.loc[b_tok:e_tok-1 , [\"postag\",\"raw_span\",'raw_span_id']] = pos,spans[i],i\n", - " \n", + " # printed = 20\n", + " for i, b_tok, e_tok, pos in spans_df.itertuples():\n", + " temp.loc[b_tok:e_tok-1, [\"postag\",\"raw_span\",'raw_span_id']] = pos,spans[i],i\n", + "\n", " # now translate from text tags to postag \n", " temp['postag'].fillna('X',inplace=True) # in our Labels, 'X' is a standin for \"N/A\" so convert N/A's to 'X'\n", " temp[\"postag_id\"] = temp['postag'].apply(lambda t: int(upostag_dict[str(t)]))\n", @@ -1559,7 +1557,7 @@ " NaN\n", " NaN\n", " 14\n", - " [ -0.37686592, -0.14841378, 0.73980016, ...\n", + " [ -0.37686658, -0.14841351, 0.7398003, ...\n", " \n", " \n", "
\n", @@ -1576,7 +1574,7 @@ " [0, 4): 'What'\n", " 0.0\n", " 11\n", - " [ -0.23266968, -0.40546328, 0.6171929, ...\n", + " [ -0.23266977, -0.40546313, 0.61719275, ...\n", " What\n", "
\n", "
\n", @@ -1593,7 +1591,7 @@ " [5, 7): 'if'\n", " 1.0\n", " 13\n", - " [ -0.8156859, -0.04782569, 0.081484295, ...\n", + " [ -0.81568515, -0.047825783, 0.08148496, ...\n", " if\n", "
\n", "
\n", @@ -1610,7 +1608,7 @@ " [8, 14): 'Google'\n", " 2.0\n", " 2\n", - " [ 0.78967804, -0.8511879, -0.48812625, ...\n", + " [ 0.7896778, -0.85118735, -0.48812556, ...\n", " Google\n", "
\n", "
\n", @@ -1627,7 +1625,7 @@ " [15, 22): 'Morphed'\n", " 3.0\n", " 3\n", - " [ -0.25935018, 0.5710723, -0.09106647, ...\n", + " [ -0.25935066, 0.57107216, -0.09106692, ...\n", " Mo\n", "
\n", " \n", @@ -1650,11 +1648,11 @@ "4 1 False VERB [15, 22): 'Morphed' \n", "\n", " raw_span_id postag_id embedding \\\n", - "0 NaN 14 [ -0.37686592, -0.14841378, 0.73980016, ... \n", - "1 0.0 11 [ -0.23266968, -0.40546328, 0.6171929, ... \n", - "2 1.0 13 [ -0.8156859, -0.04782569, 0.081484295, ... \n", - "3 2.0 2 [ 0.78967804, -0.8511879, -0.48812625, ... \n", - "4 3.0 3 [ -0.25935018, 0.5710723, -0.09106647, ... \n", + "0 NaN 14 [ -0.37686658, -0.14841351, 0.7398003, ... \n", + "1 0.0 11 [ -0.23266977, -0.40546313, 0.61719275, ... \n", + "2 1.0 13 [ -0.81568515, -0.047825783, 0.08148496, ... \n", + "3 2.0 2 [ 0.7896778, -0.85118735, -0.48812556, ... \n", + "4 3.0 3 [ -0.25935066, 0.57107216, -0.09106692, ... \n", "\n", " text \n", "0 \n", @@ -1683,12 +1681,303 @@ "execution_count": 13, "id": "63efdd32-5458-4dcb-91f3-dca97a7d1772", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
folddoc_numtoken_idspaninput_idtoken_type_idattention_maskspecial_tokens_maskpostagraw_spanraw_span_idpostag_idembeddingtext
0test00[0, 0): ''10101TrueXNaNNaN14[ -0.37686658, -0.14841351, 0.739800...
1test01[0, 4): 'What'132701FalsePRON[0, 4): 'What'0.011[ -0.23266977, -0.40546313, 0.6171927...What
2test02[5, 7): 'if'119101FalseSCONJ[5, 7): 'if'1.013[ -0.81568515, -0.047825783, 0.0814849...if
3test03[8, 14): 'Google'798601FalsePROPN[8, 14): 'Google'2.02[ 0.7896778, -0.85118735, -0.4881255...Google
4test04[15, 17): 'Mo'1255601FalseVERB[15, 22): 'Morphed'3.03[ -0.25935066, 0.57107216, -0.0910669...Mo
.............................................
307907train539756[3152, 3154): 'my'113901FalsePRON[3152, 3154): 'my'690.011[ -0.06984619, -0.4646066, 0.854770...my
307908train539757[3155, 3158): 'car'161001FalseNOUN[3155, 3158): 'car'691.04[ 0.14624149, -0.46386155, 0.596684...car
307909train539758[3158, 3159): ')'11401FalsePUNCT[3158, 3159): ')'692.05[ -0.09065091, -0.29592815, 0.5970235...)
307910train539759[3159, 3160): '.'11901FalsePUNCT[3159, 3160): '.'693.05[ 0.03102289, -0.27608734, 0.782190....
307911train539760[0, 0): ''10201TrueXNaNNaN14[ -0.50887, -0.22885998, 0.54494...
\n", + "

307912 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " fold doc_num token_id span input_id \\\n", + "0 test 0 0 [0, 0): '' 101 \n", + "1 test 0 1 [0, 4): 'What' 1327 \n", + "2 test 0 2 [5, 7): 'if' 1191 \n", + "3 test 0 3 [8, 14): 'Google' 7986 \n", + "4 test 0 4 [15, 17): 'Mo' 12556 \n", + "... ... ... ... ... ... \n", + "307907 train 539 756 [3152, 3154): 'my' 1139 \n", + "307908 train 539 757 [3155, 3158): 'car' 1610 \n", + "307909 train 539 758 [3158, 3159): ')' 114 \n", + "307910 train 539 759 [3159, 3160): '.' 119 \n", + "307911 train 539 760 [0, 0): '' 102 \n", + "\n", + " token_type_id attention_mask special_tokens_mask postag \\\n", + "0 0 1 True X \n", + "1 0 1 False PRON \n", + "2 0 1 False SCONJ \n", + "3 0 1 False PROPN \n", + "4 0 1 False VERB \n", + "... ... ... ... ... \n", + "307907 0 1 False PRON \n", + "307908 0 1 False NOUN \n", + "307909 0 1 False PUNCT \n", + "307910 0 1 False PUNCT \n", + "307911 0 1 True X \n", + "\n", + " raw_span raw_span_id postag_id \\\n", + "0 NaN NaN 14 \n", + "1 [0, 4): 'What' 0.0 11 \n", + "2 [5, 7): 'if' 1.0 13 \n", + "3 [8, 14): 'Google' 2.0 2 \n", + "4 [15, 22): 'Morphed' 3.0 3 \n", + "... ... ... ... \n", + "307907 [3152, 3154): 'my' 690.0 11 \n", + "307908 [3155, 3158): 'car' 691.0 4 \n", + "307909 [3158, 3159): ')' 692.0 5 \n", + "307910 [3159, 3160): '.' 693.0 5 \n", + "307911 NaN NaN 14 \n", + "\n", + " embedding text \n", + "0 [ -0.37686658, -0.14841351, 0.739800... \n", + "1 [ -0.23266977, -0.40546313, 0.6171927... What \n", + "2 [ -0.81568515, -0.047825783, 0.0814849... if \n", + "3 [ 0.7896778, -0.85118735, -0.4881255... Google \n", + "4 [ -0.25935066, 0.57107216, -0.0910669... Mo \n", + "... ... ... \n", + "307907 [ -0.06984619, -0.4646066, 0.854770... my \n", + "307908 [ 0.14624149, -0.46386155, 0.596684... car \n", + "307909 [ -0.09065091, -0.29592815, 0.5970235... ) \n", + "307910 [ 0.03102289, -0.27608734, 0.782190... . \n", + "307911 [ -0.50887, -0.22885998, 0.54494... \n", + "\n", + "[307912 rows x 14 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# re-read feather document if need be: \n", "if corpus_df is None or corpus_df.size == 0:\n", " corpus_df = pd.read_feather(\"outputs/conll_u_corpus.feather\")\n", - " corpus_df" + "corpus_df" ] }, { @@ -1736,7 +2025,7 @@ " \n", " \n", " \n", - " 64729\n", + " 64731\n", " train\n", " 0\n", " 0\n", @@ -1749,11 +2038,11 @@ " NaN\n", " NaN\n", " 14\n", - " [ -0.41927838, -0.22575253, 0.6648760...\n", + " [ -0.41927913, -0.22575217, 0.6648752...\n", " \n", " \n", " \n", - " 64730\n", + " 64732\n", " train\n", " 0\n", " 1\n", @@ -1766,11 +2055,11 @@ " [0, 2): 'Al'\n", " 0.0\n", " 2\n", - " [ -0.36961424, -1.0804733, -0.283367...\n", + " [ -0.36961484, -1.0804743, -0.2833683...\n", " Al\n", " \n", " \n", - " 64731\n", + " 64733\n", " train\n", " 0\n", " 2\n", @@ -1783,11 +2072,11 @@ " [2, 3): '-'\n", " 1.0\n", " 5\n", - " [ -0.9178737, -0.94624436, -0.808995...\n", + " [ -0.9178743, -0.9462442, -0.808995...\n", " -\n", " \n", " \n", - " 64732\n", + " 64734\n", " train\n", " 0\n", " 3\n", @@ -1800,11 +2089,11 @@ " [4, 9): 'Zaman'\n", " 2.0\n", " 2\n", - " [ -0.90530086, -0.97086835, -1.440879...\n", + " [ -0.90530103, -0.97086823, -1.440878...\n", " Z\n", " \n", " \n", - " 64733\n", + " 64735\n", " train\n", " 0\n", " 4\n", @@ -1817,7 +2106,7 @@ " [4, 9): 'Zaman'\n", " 2.0\n", " 2\n", - " [ -1.1586123, -1.149766, -1.194975...\n", + " [ -1.158612, -1.1497651, -1.194976...\n", " aman\n", " \n", " \n", @@ -1838,7 +2127,7 @@ " ...\n", " \n", " \n", - " 307892\n", + " 307907\n", " train\n", " 539\n", " 756\n", @@ -1851,11 +2140,11 @@ " [3152, 3154): 'my'\n", " 690.0\n", " 11\n", - " [ -0.06984596, -0.4646067, 0.8547705...\n", + " [ -0.06984619, -0.4646066, 0.854770...\n", " my\n", " \n", " \n", - " 307893\n", + " 307908\n", " train\n", " 539\n", " 757\n", @@ -1868,11 +2157,11 @@ " [3155, 3158): 'car'\n", " 691.0\n", " 4\n", - " [ 0.14624132, -0.46386197, 0.596684...\n", + " [ 0.14624149, -0.46386155, 0.596684...\n", " car\n", " \n", " \n", - " 307894\n", + " 307909\n", " train\n", " 539\n", " 758\n", @@ -1885,11 +2174,11 @@ " [3158, 3159): ')'\n", " 692.0\n", " 5\n", - " [ -0.090651065, -0.29592788, 0.597023...\n", + " [ -0.09065091, -0.29592815, 0.5970235...\n", " )\n", " \n", " \n", - " 307895\n", + " 307910\n", " train\n", " 539\n", " 759\n", @@ -1902,11 +2191,11 @@ " [3159, 3160): '.'\n", " 693.0\n", " 5\n", - " [ 0.031023545, -0.27608734, 0.782190...\n", + " [ 0.03102289, -0.27608734, 0.782190...\n", " .\n", " \n", " \n", - " 307896\n", + " 307911\n", " train\n", " 539\n", " 760\n", @@ -1919,68 +2208,68 @@ " NaN\n", " NaN\n", " 14\n", - " [ -0.5088702, -0.22885968, 0.544944...\n", + " [ -0.50887, -0.22885998, 0.54494...\n", " \n", " \n", " \n", "\n", - "

243168 rows × 14 columns

\n", + "

243181 rows × 14 columns

\n", "" ], "text/plain": [ " fold doc_num token_id span input_id \\\n", - "64729 train 0 0 [0, 0): '' 101 \n", - "64730 train 0 1 [0, 2): 'Al' 2586 \n", - "64731 train 0 2 [2, 3): '-' 118 \n", - "64732 train 0 3 [4, 5): 'Z' 163 \n", - "64733 train 0 4 [5, 9): 'aman' 19853 \n", + "64731 train 0 0 [0, 0): '' 101 \n", + "64732 train 0 1 [0, 2): 'Al' 2586 \n", + "64733 train 0 2 [2, 3): '-' 118 \n", + "64734 train 0 3 [4, 5): 'Z' 163 \n", + "64735 train 0 4 [5, 9): 'aman' 19853 \n", "... ... ... ... ... ... \n", - "307892 train 539 756 [3152, 3154): 'my' 1139 \n", - "307893 train 539 757 [3155, 3158): 'car' 1610 \n", - "307894 train 539 758 [3158, 3159): ')' 114 \n", - "307895 train 539 759 [3159, 3160): '.' 119 \n", - "307896 train 539 760 [0, 0): '' 102 \n", + "307907 train 539 756 [3152, 3154): 'my' 1139 \n", + "307908 train 539 757 [3155, 3158): 'car' 1610 \n", + "307909 train 539 758 [3158, 3159): ')' 114 \n", + "307910 train 539 759 [3159, 3160): '.' 119 \n", + "307911 train 539 760 [0, 0): '' 102 \n", "\n", " token_type_id attention_mask special_tokens_mask postag \\\n", - "64729 0 1 True X \n", - "64730 0 1 False PROPN \n", - "64731 0 1 False PUNCT \n", + "64731 0 1 True X \n", "64732 0 1 False PROPN \n", - "64733 0 1 False PROPN \n", + "64733 0 1 False PUNCT \n", + "64734 0 1 False PROPN \n", + "64735 0 1 False PROPN \n", "... ... ... ... ... \n", - "307892 0 1 False PRON \n", - "307893 0 1 False NOUN \n", - "307894 0 1 False PUNCT \n", - "307895 0 1 False PUNCT \n", - "307896 0 1 True X \n", + "307907 0 1 False PRON \n", + "307908 0 1 False NOUN \n", + "307909 0 1 False PUNCT \n", + "307910 0 1 False PUNCT \n", + "307911 0 1 True X \n", "\n", " raw_span raw_span_id postag_id \\\n", - "64729 NaN NaN 14 \n", - "64730 [0, 2): 'Al' 0.0 2 \n", - "64731 [2, 3): '-' 1.0 5 \n", - "64732 [4, 9): 'Zaman' 2.0 2 \n", - "64733 [4, 9): 'Zaman' 2.0 2 \n", + "64731 NaN NaN 14 \n", + "64732 [0, 2): 'Al' 0.0 2 \n", + "64733 [2, 3): '-' 1.0 5 \n", + "64734 [4, 9): 'Zaman' 2.0 2 \n", + "64735 [4, 9): 'Zaman' 2.0 2 \n", "... ... ... ... \n", - "307892 [3152, 3154): 'my' 690.0 11 \n", - "307893 [3155, 3158): 'car' 691.0 4 \n", - "307894 [3158, 3159): ')' 692.0 5 \n", - "307895 [3159, 3160): '.' 693.0 5 \n", - "307896 NaN NaN 14 \n", + "307907 [3152, 3154): 'my' 690.0 11 \n", + "307908 [3155, 3158): 'car' 691.0 4 \n", + "307909 [3158, 3159): ')' 692.0 5 \n", + "307910 [3159, 3160): '.' 693.0 5 \n", + "307911 NaN NaN 14 \n", "\n", " embedding text \n", - "64729 [ -0.41927838, -0.22575253, 0.6648760... \n", - "64730 [ -0.36961424, -1.0804733, -0.283367... Al \n", - "64731 [ -0.9178737, -0.94624436, -0.808995... - \n", - "64732 [ -0.90530086, -0.97086835, -1.440879... Z \n", - "64733 [ -1.1586123, -1.149766, -1.194975... aman \n", + "64731 [ -0.41927913, -0.22575217, 0.6648752... \n", + "64732 [ -0.36961484, -1.0804743, -0.2833683... Al \n", + "64733 [ -0.9178743, -0.9462442, -0.808995... - \n", + "64734 [ -0.90530103, -0.97086823, -1.440878... Z \n", + "64735 [ -1.158612, -1.1497651, -1.194976... aman \n", "... ... ... \n", - "307892 [ -0.06984596, -0.4646067, 0.8547705... my \n", - "307893 [ 0.14624132, -0.46386197, 0.596684... car \n", - "307894 [ -0.090651065, -0.29592788, 0.597023... ) \n", - "307895 [ 0.031023545, -0.27608734, 0.782190... . \n", - "307896 [ -0.5088702, -0.22885968, 0.544944... \n", + "307907 [ -0.06984619, -0.4646066, 0.854770... my \n", + "307908 [ 0.14624149, -0.46386155, 0.596684... car \n", + "307909 [ -0.09065091, -0.29592815, 0.5970235... ) \n", + "307910 [ 0.03102289, -0.27608734, 0.782190... . \n", + "307911 [ -0.50887, -0.22885998, 0.54494... \n", "\n", - "[243168 rows x 14 columns]" + "[243181 rows x 14 columns]" ] }, "execution_count": 14, @@ -2010,29 +2299,120 @@ "id": "034a61f3-7fe8-4b02-b649-cea67e14ceb3", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RUNNING THE L-BFGS-B CODE\n", + "\n", + " * * *\n", + "\n", + "Machine precision = 2.220D-16\n", + " N = 13073 M = 10\n", + "\n", + "At X0 0 variables are exactly at the bounds\n", + "\n", + "At iterate 0 f= 6.88984D+05 |proj g|= 6.62729D+04\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", - "/Users/freiss/opt/miniconda3/envs/pd/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + " This problem is unconstrained.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "At iterate 50 f= 2.44541D+05 |proj g|= 3.93970D+03\n", + "\n", + "At iterate 100 f= 1.63368D+05 |proj g|= 1.71818D+03\n", + "\n", + "At iterate 150 f= 1.32218D+05 |proj g|= 1.03361D+03\n", + "\n", + "At iterate 200 f= 1.18130D+05 |proj g|= 7.32021D+02\n", + "\n", + "At iterate 250 f= 1.09684D+05 |proj g|= 1.23366D+03\n", + "\n", + "At iterate 300 f= 1.05398D+05 |proj g|= 6.35734D+02\n", + "\n", + "At iterate 350 f= 1.02851D+05 |proj g|= 2.76671D+02\n", + "\n", + "At iterate 400 f= 1.01228D+05 |proj g|= 5.09281D+02\n", + "\n", + "At iterate 450 f= 1.00038D+05 |proj g|= 3.14557D+02\n", + "\n", + "At iterate 500 f= 9.92494D+04 |proj g|= 1.68499D+02\n", + "\n", + "At iterate 550 f= 9.88417D+04 |proj g|= 4.91916D+02\n", + "\n", + "At iterate 600 f= 9.85123D+04 |proj g|= 2.02593D+02\n", + "\n", + "At iterate 650 f= 9.82550D+04 |proj g|= 1.28953D+02\n", + "\n", + "At iterate 700 f= 9.81148D+04 |proj g|= 1.09533D+02\n", + "\n", + "At iterate 750 f= 9.80368D+04 |proj g|= 8.88012D+01\n", + "\n", + "At iterate 800 f= 9.79714D+04 |proj g|= 7.48262D+01\n", + "\n", + "At iterate 850 f= 9.79321D+04 |proj g|= 1.00950D+02\n", + "\n", + "At iterate 900 f= 9.79023D+04 |proj g|= 2.59398D+01\n", + "\n", + "At iterate 950 f= 9.78679D+04 |proj g|= 3.74091D+01\n", + "\n", + "At iterate 1000 f= 9.78449D+04 |proj g|= 3.24331D+01\n", + "\n", + " * * *\n", + "\n", + "Tit = total number of iterations\n", + "Tnf = total number of function evaluations\n", + "Tnint = total number of segments explored during Cauchy searches\n", + "Skip = number of BFGS updates skipped\n", + "Nact = number of active bounds at final generalized Cauchy point\n", + "Projg = norm of the final projected gradient\n", + "F = final function value\n", + "\n", + " * * *\n", + "\n", + " N Tit Tnf Tnint Skip Nact Projg F\n", + "13073 1000 1065 1 0 0 3.243D+01 9.784D+04\n", + " F = 97844.884007299028 \n", + "\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/freiss/opt/miniconda3/envs/pd/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", - " n_iter_i = _check_optimize_result(\n", - "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 16.6min remaining: 0.0s\n", - "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 16.6min finished\n" + " n_iter_i = _check_optimize_result(\n" ] }, { "data": { + "text/html": [ + "
Pipeline(steps=[('mlogreg',\n",
+       "                 LogisticRegression(C=0.1, max_iter=1000,\n",
+       "                                    multi_class='multinomial', verbose=1))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ "Pipeline(steps=[('mlogreg',\n", - " LogisticRegression(max_iter=1000, multi_class='multinomial',\n", - " verbose=10))])" + " LogisticRegression(C=0.1, max_iter=1000,\n", + " multi_class='multinomial', verbose=1))])" ] }, "execution_count": 15, @@ -2041,12 +2421,13 @@ } ], "source": [ - "# now actually train a model, using sklearn \n", - "MULTI_CLASS= \"multinomial\"\n", + "# now actually train a model, using sklearn\n", + "MULTI_CLASS = \"multinomial\"\n", "\n", "# How many iterations to run the BGFS optimizer when fitting logistic\n", "# regression models. 100 ==> Fast; 10000 ==> Full convergence\n", "LBGFS_ITERATIONS = 1000\n", + "REGULARIZATION_COEFF = 1e-1\n", "\n", "base_pipeline = sklearn.pipeline.Pipeline([\n", " # Standard scaler. This only makes a difference for certain classes\n", @@ -2054,8 +2435,9 @@ " #(\"scaler\", sklearn.preprocessing.StandardScaler()),\n", " (\"mlogreg\", sklearn.linear_model.LogisticRegression(\n", " multi_class=MULTI_CLASS,\n", - " verbose=10,\n", - " max_iter=LBGFS_ITERATIONS\n", + " verbose=1,\n", + " max_iter=LBGFS_ITERATIONS,\n", + " C=REGULARIZATION_COEFF\n", " ))\n", "])\n", "\n", @@ -2165,11 +2547,11 @@ " NaN\n", " NaN\n", " 14\n", - " [ -0.37686592, -0.14841378, 0.7398001...\n", + " [ -0.37686658, -0.14841351, 0.739800...\n", " \n", " 14\n", " X\n", - " [ 3.681475919382054e-11, 8.766155854203454e-1...\n", + " [1.8453993737147312e-09, 7.2817536336665424e-0...\n", " \n", " \n", " 1\n", @@ -2185,11 +2567,11 @@ " [0, 4): 'What'\n", " 0.0\n", " 11\n", - " [ -0.23266968, -0.40546328, 0.617192...\n", + " [ -0.23266977, -0.40546313, 0.6171927...\n", " What\n", " 5\n", " PUNCT\n", - " [ 4.480117969135689e-05, 0.000492260661933639...\n", + " [0.00018662917618329718, 0.002463643966812593...\n", " \n", " \n", " 2\n", @@ -2205,11 +2587,11 @@ " [5, 7): 'if'\n", " 1.0\n", " 13\n", - " [ -0.8156859, -0.04782569, 0.08148429...\n", + " [ -0.81568515, -0.047825783, 0.0814849...\n", " if\n", " 13\n", " SCONJ\n", - " [ 0.00458089489431613, 1.0102614181540655e-0...\n", + " [ 0.0041733565145387315, 1.600001587508807e-0...\n", " \n", " \n", " 3\n", @@ -2225,11 +2607,11 @@ " [8, 14): 'Google'\n", " 2.0\n", " 2\n", - " [ 0.78967804, -0.8511879, -0.4881262...\n", + " [ 0.7896778, -0.85118735, -0.4881255...\n", " Google\n", " 2\n", " PROPN\n", - " [2.0128060688355368e-13, 4.3664010704307723e-1...\n", + " [1.9368418908888587e-11, 2.3583553447853555e-0...\n", " \n", " \n", " 4\n", @@ -2245,11 +2627,11 @@ " [15, 22): 'Morphed'\n", " 3.0\n", " 3\n", - " [ -0.25935018, 0.5710723, -0.0910664...\n", + " [ -0.25935066, 0.57107216, -0.0910669...\n", " Mo\n", - " 2\n", - " PROPN\n", - " [ 0.004772281895284574, 3.990804066047649e-0...\n", + " 4\n", + " NOUN\n", + " [ 0.019704268908089885, 4.618509095536987e-0...\n", " \n", " \n", " 5\n", @@ -2265,11 +2647,11 @@ " [15, 22): 'Morphed'\n", " 3.0\n", " 3\n", - " [ -0.3267119, -0.10905984, 0.053087...\n", + " [ -0.32671162, -0.10906017, 0.0530867...\n", " rp\n", - " 2\n", - " PROPN\n", - " [ 4.133346131920443e-14, 3.0715492927999484e-0...\n", + " 4\n", + " NOUN\n", + " [ 7.710227050759564e-11, 5.44760536137293e-0...\n", " \n", " \n", " 6\n", @@ -2285,11 +2667,11 @@ " [15, 22): 'Morphed'\n", " 3.0\n", " 3\n", - " [ -0.9018082, -0.16881262, 0.4379902...\n", + " [ -0.9018081, -0.16881368, 0.4379903...\n", " hed\n", " 3\n", " VERB\n", - " [ 0.0003547861848056146, 2.0943022199837429e-1...\n", + " [0.00028466818218936664, 5.74427599535707e-0...\n", " \n", " \n", " 7\n", @@ -2305,11 +2687,11 @@ " [23, 27): 'Into'\n", " 4.0\n", " 0\n", - " [ 0.09566124, -0.109931074, -0.1493219...\n", + " [ 0.095660955, -0.10993134, -0.149321...\n", " Into\n", " 0\n", " ADP\n", - " [ 0.98593362749934, 2.223312204453196e-1...\n", + " [ 0.937516572524648, 5.276906149141363e-1...\n", " \n", " \n", " 8\n", @@ -2325,11 +2707,11 @@ " [28, 36): 'GoogleOS'\n", " 5.0\n", " 2\n", - " [ -1.2022994, -0.29254374, 0.2236384...\n", + " [ -1.2022991, -0.29254493, 0.2236394...\n", " Google\n", " 2\n", " PROPN\n", - " [ 8.802423148364236e-22, 9.098724786449631e-2...\n", + " [ 4.637041881505099e-16, 2.9507503915558095e-1...\n", " \n", " \n", " 9\n", @@ -2345,11 +2727,11 @@ " [28, 36): 'GoogleOS'\n", " 5.0\n", " 2\n", - " [ -0.78180003, -0.20742358, -1.288184...\n", + " [ -0.7818, -0.20742272, -1.288183...\n", " OS\n", " 2\n", " PROPN\n", - " [ 6.859296955972293e-14, 1.3584745823663452e-1...\n", + " [ 6.662825566299148e-09, 7.615507893740757e-1...\n", " \n", " \n", " 10\n", @@ -2365,11 +2747,11 @@ " [36, 37): '?'\n", " 6.0\n", " 5\n", - " [ -0.34068698, -0.4208277, 0.674408...\n", + " [ -0.3406865, -0.42082712, 0.674408...\n", " ?\n", " 5\n", " PUNCT\n", - " [ 4.419303709134792e-06, 2.2879619521678678e-0...\n", + " [2.1774277583327972e-05, 8.000939232471684e-0...\n", " \n", " \n", " 11\n", @@ -2385,11 +2767,11 @@ " [38, 42): 'What'\n", " 7.0\n", " 11\n", - " [ -0.39101043, -0.33632284, 0.6353156...\n", + " [ -0.3910109, -0.3363229, 0.6353158...\n", " What\n", - " 13\n", - " SCONJ\n", - " [ 1.089099756150613e-05, 4.301330361203966e-0...\n", + " 5\n", + " PUNCT\n", + " [ 4.431027855005785e-05, 0.0001618364833338211...\n", " \n", " \n", " 12\n", @@ -2405,11 +2787,11 @@ " [43, 45): 'if'\n", " 8.0\n", " 13\n", - " [ -0.68665487, -0.16331403, 0.2546722...\n", + " [ -0.6866545, -0.16331364, 0.2546724...\n", " if\n", " 13\n", " SCONJ\n", - " [ 0.00056745829301006, 1.0575813370745938e-0...\n", + " [ 0.0002872959609098302, 3.591134341517693e-0...\n", " \n", " \n", " 13\n", @@ -2425,11 +2807,11 @@ " [46, 52): 'Google'\n", " 9.0\n", " 2\n", - " [ 0.57027435, -0.9182296, -0.1871779...\n", + " [ 0.57027406, -0.9182299, -0.1871781...\n", " Google\n", " 2\n", " PROPN\n", - " [ 5.256030568823519e-06, 0.000141057983398473...\n", + " [1.5862060169266657e-06, 0.00870008781376279...\n", " \n", " \n", " 14\n", @@ -2445,11 +2827,11 @@ " [53, 61): 'expanded'\n", " 10.0\n", " 3\n", - " [ -0.48126522, -0.1581611, 0.4039635...\n", + " [ -0.48126468, -0.15816039, 0.4039639...\n", " expanded\n", " 3\n", " VERB\n", - " [ 4.050874025777886e-07, 1.9460546573814385e-1...\n", + " [2.2494319522580332e-06, 1.3830784723467198e-0...\n", " \n", " \n", " 15\n", @@ -2465,11 +2847,11 @@ " [62, 64): 'on'\n", " 11.0\n", " 0\n", - " [ -0.17011818, -0.37733135, 0.745948...\n", + " [ -0.17011856, -0.37733135, 0.7459479...\n", " on\n", " 0\n", " ADP\n", - " [ 0.994939087225643, 1.8480376986013427e-0...\n", + " [ 0.9969812735277428, 2.038596982401045e-0...\n", " \n", " \n", " 16\n", @@ -2485,11 +2867,11 @@ " [65, 68): 'its'\n", " 12.0\n", " 11\n", - " [ -0.34582123, -0.3814539, 0.539305...\n", + " [ -0.34582132, -0.38145372, 0.5393058...\n", " its\n", - " 11\n", - " PRON\n", - " [ 0.14933916780906895, 1.9661816857805298e-0...\n", + " 0\n", + " ADP\n", + " [ 0.3528985046235023, 0.0004074385035340905...\n", " \n", " \n", " 17\n", @@ -2505,11 +2887,11 @@ " [69, 75): 'search'\n", " 13.0\n", " 4\n", - " [ -0.1650713, -0.54526025, 0.648461...\n", + " [ -0.16507219, -0.5452602, 0.648461...\n", " search\n", " 4\n", " NOUN\n", - " [ 5.089421239766719e-07, 5.0023756350866345e-0...\n", + " [ 2.736910426420035e-06, 2.578768500234103e-0...\n", " \n", " \n", " 18\n", @@ -2525,11 +2907,11 @@ " [75, 76): '-'\n", " 14.0\n", " 5\n", - " [ -0.16116095, -0.44251364, 0.7121795...\n", + " [ -0.16116115, -0.44251344, 0.712179...\n", " -\n", " 5\n", " PUNCT\n", - " [ 0.0004927920586926724, 6.359739270183003e-0...\n", + " [ 0.005427808445130677, 4.262439649575787e-0...\n", " \n", " \n", " 19\n", @@ -2545,11 +2927,11 @@ " [77, 83): 'engine'\n", " 15.0\n", " 4\n", - " [ -0.35368297, -0.47415957, 0.4551175...\n", + " [ -0.35368297, -0.47415996, 0.4551170...\n", " engine\n", " 4\n", " NOUN\n", - " [ 1.888161183325207e-08, 1.225134189182207e-1...\n", + " [3.6459129481986373e-06, 2.963439538826619e-1...\n", " \n", " \n", "\n", @@ -2601,48 +2983,48 @@ "19 1 False NOUN [77, 83): 'engine' \n", "\n", " raw_span_id postag_id embedding \\\n", - "0 NaN 14 [ -0.37686592, -0.14841378, 0.7398001... \n", - "1 0.0 11 [ -0.23266968, -0.40546328, 0.617192... \n", - "2 1.0 13 [ -0.8156859, -0.04782569, 0.08148429... \n", - "3 2.0 2 [ 0.78967804, -0.8511879, -0.4881262... \n", - "4 3.0 3 [ -0.25935018, 0.5710723, -0.0910664... \n", - "5 3.0 3 [ -0.3267119, -0.10905984, 0.053087... \n", - "6 3.0 3 [ -0.9018082, -0.16881262, 0.4379902... \n", - "7 4.0 0 [ 0.09566124, -0.109931074, -0.1493219... \n", - "8 5.0 2 [ -1.2022994, -0.29254374, 0.2236384... \n", - "9 5.0 2 [ -0.78180003, -0.20742358, -1.288184... \n", - "10 6.0 5 [ -0.34068698, -0.4208277, 0.674408... \n", - "11 7.0 11 [ -0.39101043, -0.33632284, 0.6353156... \n", - "12 8.0 13 [ -0.68665487, -0.16331403, 0.2546722... \n", - "13 9.0 2 [ 0.57027435, -0.9182296, -0.1871779... \n", - "14 10.0 3 [ -0.48126522, -0.1581611, 0.4039635... \n", - "15 11.0 0 [ -0.17011818, -0.37733135, 0.745948... \n", - "16 12.0 11 [ -0.34582123, -0.3814539, 0.539305... \n", - "17 13.0 4 [ -0.1650713, -0.54526025, 0.648461... \n", - "18 14.0 5 [ -0.16116095, -0.44251364, 0.7121795... \n", - "19 15.0 4 [ -0.35368297, -0.47415957, 0.4551175... \n", + "0 NaN 14 [ -0.37686658, -0.14841351, 0.739800... \n", + "1 0.0 11 [ -0.23266977, -0.40546313, 0.6171927... \n", + "2 1.0 13 [ -0.81568515, -0.047825783, 0.0814849... \n", + "3 2.0 2 [ 0.7896778, -0.85118735, -0.4881255... \n", + "4 3.0 3 [ -0.25935066, 0.57107216, -0.0910669... \n", + "5 3.0 3 [ -0.32671162, -0.10906017, 0.0530867... \n", + "6 3.0 3 [ -0.9018081, -0.16881368, 0.4379903... \n", + "7 4.0 0 [ 0.095660955, -0.10993134, -0.149321... \n", + "8 5.0 2 [ -1.2022991, -0.29254493, 0.2236394... \n", + "9 5.0 2 [ -0.7818, -0.20742272, -1.288183... \n", + "10 6.0 5 [ -0.3406865, -0.42082712, 0.674408... \n", + "11 7.0 11 [ -0.3910109, -0.3363229, 0.6353158... \n", + "12 8.0 13 [ -0.6866545, -0.16331364, 0.2546724... \n", + "13 9.0 2 [ 0.57027406, -0.9182299, -0.1871781... \n", + "14 10.0 3 [ -0.48126468, -0.15816039, 0.4039639... \n", + "15 11.0 0 [ -0.17011856, -0.37733135, 0.7459479... \n", + "16 12.0 11 [ -0.34582132, -0.38145372, 0.5393058... \n", + "17 13.0 4 [ -0.16507219, -0.5452602, 0.648461... \n", + "18 14.0 5 [ -0.16116115, -0.44251344, 0.712179... \n", + "19 15.0 4 [ -0.35368297, -0.47415996, 0.4551170... \n", "\n", " text p_id p_postag raw_output \n", - "0 14 X [ 3.681475919382054e-11, 8.766155854203454e-1... \n", - "1 What 5 PUNCT [ 4.480117969135689e-05, 0.000492260661933639... \n", - "2 if 13 SCONJ [ 0.00458089489431613, 1.0102614181540655e-0... \n", - "3 Google 2 PROPN [2.0128060688355368e-13, 4.3664010704307723e-1... \n", - "4 Mo 2 PROPN [ 0.004772281895284574, 3.990804066047649e-0... \n", - "5 rp 2 PROPN [ 4.133346131920443e-14, 3.0715492927999484e-0... \n", - "6 hed 3 VERB [ 0.0003547861848056146, 2.0943022199837429e-1... \n", - "7 Into 0 ADP [ 0.98593362749934, 2.223312204453196e-1... \n", - "8 Google 2 PROPN [ 8.802423148364236e-22, 9.098724786449631e-2... \n", - "9 OS 2 PROPN [ 6.859296955972293e-14, 1.3584745823663452e-1... \n", - "10 ? 5 PUNCT [ 4.419303709134792e-06, 2.2879619521678678e-0... \n", - "11 What 13 SCONJ [ 1.089099756150613e-05, 4.301330361203966e-0... \n", - "12 if 13 SCONJ [ 0.00056745829301006, 1.0575813370745938e-0... \n", - "13 Google 2 PROPN [ 5.256030568823519e-06, 0.000141057983398473... \n", - "14 expanded 3 VERB [ 4.050874025777886e-07, 1.9460546573814385e-1... \n", - "15 on 0 ADP [ 0.994939087225643, 1.8480376986013427e-0... \n", - "16 its 11 PRON [ 0.14933916780906895, 1.9661816857805298e-0... \n", - "17 search 4 NOUN [ 5.089421239766719e-07, 5.0023756350866345e-0... \n", - "18 - 5 PUNCT [ 0.0004927920586926724, 6.359739270183003e-0... \n", - "19 engine 4 NOUN [ 1.888161183325207e-08, 1.225134189182207e-1... " + "0 14 X [1.8453993737147312e-09, 7.2817536336665424e-0... \n", + "1 What 5 PUNCT [0.00018662917618329718, 0.002463643966812593... \n", + "2 if 13 SCONJ [ 0.0041733565145387315, 1.600001587508807e-0... \n", + "3 Google 2 PROPN [1.9368418908888587e-11, 2.3583553447853555e-0... \n", + "4 Mo 4 NOUN [ 0.019704268908089885, 4.618509095536987e-0... \n", + "5 rp 4 NOUN [ 7.710227050759564e-11, 5.44760536137293e-0... \n", + "6 hed 3 VERB [0.00028466818218936664, 5.74427599535707e-0... \n", + "7 Into 0 ADP [ 0.937516572524648, 5.276906149141363e-1... \n", + "8 Google 2 PROPN [ 4.637041881505099e-16, 2.9507503915558095e-1... \n", + "9 OS 2 PROPN [ 6.662825566299148e-09, 7.615507893740757e-1... \n", + "10 ? 5 PUNCT [2.1774277583327972e-05, 8.000939232471684e-0... \n", + "11 What 5 PUNCT [ 4.431027855005785e-05, 0.0001618364833338211... \n", + "12 if 13 SCONJ [ 0.0002872959609098302, 3.591134341517693e-0... \n", + "13 Google 2 PROPN [1.5862060169266657e-06, 0.00870008781376279... \n", + "14 expanded 3 VERB [2.2494319522580332e-06, 1.3830784723467198e-0... \n", + "15 on 0 ADP [ 0.9969812735277428, 2.038596982401045e-0... \n", + "16 its 0 ADP [ 0.3528985046235023, 0.0004074385035340905... \n", + "17 search 4 NOUN [ 2.736910426420035e-06, 2.578768500234103e-0... \n", + "18 - 5 PUNCT [ 0.005427808445130677, 4.262439649575787e-0... \n", + "19 engine 4 NOUN [3.6459129481986373e-06, 2.963439538826619e-1... " ] }, "execution_count": 17, @@ -2653,13 +3035,14 @@ "source": [ "def infer_on_df(df: pd.DataFrame, id_to_class_dict, predictor):\n", " result_df = df.copy()\n", - " raw_outputs = tp.TensorArray(predictor.predict_proba(result_df[\"embedding\"]))\n", + " inputs = result_df[\"embedding\"].to_numpy()\n", + " raw_outputs = tp.TensorArray(predictor.predict_proba(inputs))\n", " result_df[\"p_id\"] = np.argmax(raw_outputs, axis=1)\n", " result_df[\"p_postag\"]= result_df[\"p_id\"].apply(lambda p_id: id_to_class_dict[p_id])\n", " result_df[\"raw_output\"] = raw_outputs\n", " return result_df\n", "\n", - "test_results = infer_on_df(corpus_df[corpus_df[\"fold\"] == \"test\"],upostags_list,base_model)\n", + "test_results = infer_on_df(corpus_df[corpus_df[\"fold\"] == \"test\"], upostags_list, base_model)\n", "test_results.head(20)" ] }, @@ -2755,8 +3138,8 @@ " 0\n", " VERB\n", " 3\n", - " 2\n", - " PROPN\n", + " 4\n", + " NOUN\n", " \n", " \n", " [23, 27): 'Into'\n", @@ -2832,7 +3215,7 @@ "[0, 4): 'What' test 0 PRON 11 5 \n", "[5, 7): 'if' test 0 SCONJ 13 13 \n", "[8, 14): 'Google' test 0 PROPN 2 2 \n", - "[15, 22): 'Morphed' test 0 VERB 3 2 \n", + "[15, 22): 'Morphed' test 0 VERB 3 4 \n", "[23, 27): 'Into' test 0 ADP 0 0 \n", "... ... ... ... ... ... \n", "[307, 309): 'of' test 2 ADP 0 0 \n", @@ -2846,7 +3229,7 @@ "[0, 4): 'What' PUNCT \n", "[5, 7): 'if' SCONJ \n", "[8, 14): 'Google' PROPN \n", - "[15, 22): 'Morphed' PROPN \n", + "[15, 22): 'Morphed' NOUN \n", "[23, 27): 'Into' ADP \n", "... ... \n", "[307, 309): 'of' ADP \n", @@ -2884,27 +3267,27 @@ "text": [ " precision recall f1-score support\n", "\n", - " ADJ 0.793 0.783 0.788 1782\n", - " ADP 0.916 0.919 0.917 2030\n", - " ADV 0.788 0.749 0.768 1147\n", - " AUX 0.939 0.952 0.946 1509\n", - " CCONJ 0.971 0.961 0.966 738\n", - " DET 0.959 0.958 0.958 1898\n", - " INTJ 0.876 0.708 0.783 120\n", - " NOUN 0.864 0.893 0.878 4136\n", - " NUM 0.839 0.893 0.865 541\n", - " PART 0.943 0.938 0.940 630\n", - " PRON 0.967 0.965 0.966 2158\n", - " PROPN 0.844 0.831 0.837 1985\n", - " PUNCT 0.987 0.963 0.975 3098\n", - " SCONJ 0.852 0.828 0.840 443\n", - " SYM 0.646 0.604 0.624 106\n", - " VERB 0.906 0.901 0.904 2640\n", - " X 0.482 0.686 0.566 137\n", + " ADJ 0.796 0.775 0.785 1784\n", + " ADP 0.911 0.923 0.917 2033\n", + " ADV 0.784 0.748 0.765 1181\n", + " AUX 0.945 0.961 0.953 1525\n", + " CCONJ 0.975 0.966 0.971 737\n", + " DET 0.959 0.959 0.959 1898\n", + " INTJ 0.850 0.708 0.773 120\n", + " NOUN 0.863 0.891 0.877 4137\n", + " NUM 0.809 0.906 0.854 541\n", + " PART 0.947 0.940 0.944 649\n", + " PRON 0.963 0.967 0.965 2162\n", + " PROPN 0.846 0.834 0.840 1981\n", + " PUNCT 0.984 0.964 0.974 3098\n", + " SCONJ 0.857 0.781 0.817 384\n", + " SYM 0.639 0.495 0.558 107\n", + " VERB 0.911 0.900 0.905 2624\n", + " X 0.503 0.689 0.581 135\n", "\n", - " accuracy 0.899 25098\n", - " macro avg 0.857 0.855 0.854 25098\n", - "weighted avg 0.900 0.899 0.899 25098\n", + " accuracy 0.898 25096\n", + " macro avg 0.855 0.847 0.849 25096\n", + "weighted avg 0.899 0.898 0.898 25096\n", "\n" ] } @@ -2940,7 +3323,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/notebooks/Sentiment_Analysis.ipynb b/notebooks/Sentiment_Analysis.ipynb index 60d1bab4..bd98e934 100644 --- a/notebooks/Sentiment_Analysis.ipynb +++ b/notebooks/Sentiment_Analysis.ipynb @@ -458,7 +458,7 @@ " 5632\n", " 5807\n", " 494\n", - " 5682\n", + " 5681\n", " 6512\n", " 32\n", " \n", @@ -494,7 +494,7 @@ " 6833\n", " 7106\n", " 829\n", - " 7203\n", + " 7202\n", " 7984\n", " 33\n", " \n", @@ -539,7 +539,7 @@ " 15781\n", " 16254\n", " 2760\n", - " 16501\n", + " 16500\n", " 19334\n", " 33\n", " \n", @@ -557,7 +557,7 @@ " 11611\n", " 10646\n", " 1704\n", - " 11142\n", + " 11141\n", " 12559\n", " 33\n", " \n", @@ -566,7 +566,7 @@ " 16145\n", " 15483\n", " 2328\n", - " 15914\n", + " 15913\n", " 18553\n", " 33\n", " \n", @@ -584,7 +584,7 @@ " 4958\n", " 4960\n", " 495\n", - " 4997\n", + " 4996\n", " 5529\n", " 33\n", " \n", @@ -593,7 +593,7 @@ " 6781\n", " 7373\n", " 1173\n", - " 7463\n", + " 7462\n", " 8460\n", " 33\n", " \n", @@ -620,7 +620,7 @@ " 16908\n", " 17136\n", " 3261\n", - " 17719\n", + " 17718\n", " 20576\n", " 33\n", " \n", @@ -782,7 +782,7 @@ " 6063\n", " 6542\n", " 804\n", - " 6639\n", + " 6638\n", " 7308\n", " 33\n", " \n", @@ -791,7 +791,7 @@ " 3002\n", " 3002\n", " 291\n", - " 3038\n", + " 3037\n", " 3355\n", " 33\n", " \n", @@ -818,7 +818,7 @@ " 10729\n", " 10025\n", " 1735\n", - " 10402\n", + " 10401\n", " 11760\n", " 33\n", " \n", @@ -902,25 +902,25 @@ " Review_Date Author_Name Vehicle_Title Review_Title Review \\\n", "Car_Make \n", "AMGeneral 5 5 2 5 5 \n", - "Acura 5632 5807 494 5682 6512 \n", + "Acura 5632 5807 494 5681 6512 \n", "AlfaRomeo 77 76 22 77 77 \n", "AstonMartin 82 89 31 89 89 \n", "Audi 5069 5389 753 5467 6006 \n", - "BMW 6833 7106 829 7203 7984 \n", + "BMW 6833 7106 829 7202 7984 \n", "Bentley 150 146 39 141 150 \n", "Bugatti 9 9 4 9 9 \n", "Buick 3406 3242 374 3334 3615 \n", "Cadillac 3539 3531 457 3593 3902 \n", - "Chevrolet 15781 16254 2760 16501 19334 \n", + "Chevrolet 15781 16254 2760 16500 19334 \n", "GMC 4327 4425 1261 4415 4964 \n", - "Honda 11611 10646 1704 11142 12559 \n", - "Toyota 16145 15483 2328 15914 18553 \n", + "Honda 11611 10646 1704 11141 12559 \n", + "Toyota 16145 15483 2328 15913 18553 \n", "Volkswagen 8260 8219 1577 8481 9334 \n", - "chrysler 4958 4960 495 4997 5529 \n", - "dodge 6781 7373 1173 7463 8460 \n", + "chrysler 4958 4960 495 4996 5529 \n", + "dodge 6781 7373 1173 7462 8460 \n", "ferrari 156 159 47 156 161 \n", "fiat 394 380 68 391 391 \n", - "ford 16908 17136 3261 17719 20576 \n", + "ford 16908 17136 3261 17718 20576 \n", "genesis 78 75 16 78 77 \n", "hummer 537 541 35 531 559 \n", "hyundai 7679 7032 943 7250 8156 \n", @@ -938,11 +938,11 @@ "maybach 24 24 6 24 24 \n", "mazda 7165 6830 938 7036 7820 \n", "mclaren 1 1 1 1 1 \n", - "mercedes-benz 6063 6542 804 6639 7308 \n", - "mercury 3002 3002 291 3038 3355 \n", + "mercedes-benz 6063 6542 804 6638 7308 \n", + "mercury 3002 3002 291 3037 3355 \n", "mini 1033 977 127 997 1036 \n", "mitsubishi 3982 4382 601 4222 4773 \n", - "nissan 10729 10025 1735 10402 11760 \n", + "nissan 10729 10025 1735 10401 11760 \n", "pontiac 5066 5294 345 5239 5927 \n", "porsche 1636 1646 280 1657 1774 \n", "ram 564 505 281 551 553 \n", @@ -1059,11 +1059,11 @@ { "data": { "text/plain": [ - "Review_Date 7239\n", - "Author_Name 7410\n", - "Vehicle_Title 5198\n", - "Review_Title 7648\n", - "Review 8341\n", + "Review_Date 7321\n", + "Author_Name 7434\n", + "Vehicle_Title 5292\n", + "Review_Title 7665\n", + "Review 8338\n", "Rating\\r 33\n", "Car_Make 50\n", "dtype: int64" @@ -1153,17 +1153,6 @@ " \n", " \n", " 0\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", - " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 1\n", " on 06/15/02 00:00 AM (PDT)\n", " mike6382\n", " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", @@ -1174,7 +1163,7 @@ " What a waste: I have owned this car for a year...\n", " \n", " \n", - " 2\n", + " 1\n", " on 12/18/05 19:55 PM (PST)\n", " Clayton\n", " 2000 AM General Hummer SUV 4dr SUV AWD\n", @@ -1185,6 +1174,17 @@ " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", + " 2\n", + " on 01/19/06 19:46 PM (PST)\n", + " REUBEN\n", + " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", + " AWESOME HUMMER\n", + " Hummer is unstoppable. May only get 12 mpg bu...\n", + " 5.000\n", + " AMGeneral\n", + " AWESOME HUMMER: Hummer is unstoppable. May onl...\n", + " \n", + " \n", " 3\n", " on 08/23/03 00:00 AM (PDT)\n", " Bobby Keene\n", @@ -1197,14 +1197,14 @@ " \n", " \n", " 4\n", - " on 01/19/06 19:46 PM (PST)\n", - " REUBEN\n", - " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", - " AWESOME HUMMER\n", - " Hummer is unstoppable. May only get 12 mpg bu...\n", - " 5.000\n", + " on 08/30/02 00:00 AM (PDT)\n", + " bluice3309\n", + " 2000 AM General Hummer SUV 4dr SUV AWD\n", + " a true ride\n", + " this beast can go through just about \\ranythi...\n", + " 4.625\n", " AMGeneral\n", - " AWESOME HUMMER: Hummer is unstoppable. May onl...\n", + " a true ride: this beast can go through just ab...\n", " \n", " \n", "\n", @@ -1212,32 +1212,32 @@ ], "text/plain": [ " Review_Date Author_Name \\\n", - "0 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "1 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "2 on 12/18/05 19:55 PM (PST) Clayton \n", + "0 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "1 on 12/18/05 19:55 PM (PST) Clayton \n", + "2 on 01/19/06 19:46 PM (PST) REUBEN \n", "3 on 08/23/03 00:00 AM (PDT) Bobby Keene \n", - "4 on 01/19/06 19:46 PM (PST) REUBEN \n", + "4 on 08/30/02 00:00 AM (PDT) bluice3309 \n", "\n", " Vehicle_Title Review_Title \\\n", - "0 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "1 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "2 2000 AM General Hummer SUV 4dr SUV AWD HUMMER NOT A bummer \n", + "0 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "1 2000 AM General Hummer SUV 4dr SUV AWD HUMMER NOT A bummer \n", + "2 2000 AM General Hummer SUV Hard Top 4dr SUV AWD AWESOME HUMMER \n", "3 2000 AM General Hummer SUV Hard Top 4dr SUV AWD H1 Review \n", - "4 2000 AM General Hummer SUV Hard Top 4dr SUV AWD AWESOME HUMMER \n", + "4 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", "\n", " Review Rating\\r Car_Make \\\n", - "0 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "1 I have owned this car for a year and a \\rhalf... 1.000 AMGeneral \n", - "2 Vehicle is a beast. I don't recommend HUMMER ... 5.000 AMGeneral \n", + "0 I have owned this car for a year and a \\rhalf... 1.000 AMGeneral \n", + "1 Vehicle is a beast. I don't recommend HUMMER ... 5.000 AMGeneral \n", + "2 Hummer is unstoppable. May only get 12 mpg bu... 5.000 AMGeneral \n", "3 The truck is incredible. I have a long histo... 4.500 AMGeneral \n", - "4 Hummer is unstoppable. May only get 12 mpg bu... 5.000 AMGeneral \n", + "4 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", "\n", " Review_Content \n", - "0 a true ride: this beast can go through just ab... \n", - "1 What a waste: I have owned this car for a year... \n", - "2 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "0 What a waste: I have owned this car for a year... \n", + "1 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "2 AWESOME HUMMER: Hummer is unstoppable. May onl... \n", "3 H1 Review: The truck is incredible. I have a ... \n", - "4 AWESOME HUMMER: Hummer is unstoppable. May onl... " + "4 a true ride: this beast can go through just ab... " ] }, "execution_count": 9, @@ -1265,7 +1265,7 @@ { "data": { "text/plain": [ - "'a true ride: this beast can go through just about \\ranything you through at it. water, \\rfire, brick wall, glass ,ice , you name \\rit. i like my toys to be tough enough \\rto handle what i through at them. and \\rthis toy has NOT let me down and i do \\rnot think it ever will!!!'" + "'What a waste: I have owned this car for a year and a \\rhalf now and it is not reliabile at \\rall. I have driven it through \\reverything and it stalls on me all the \\rtime. I would never buy this car \\ragain. and trying to sell it is like \\rtrying to sell fire in hell, just wont \\rhappen.'" ] }, "execution_count": 10, @@ -1355,95 +1355,63 @@ { "data": { "text/plain": [ - "{'usage': {'text_units': 1, 'text_characters': 272, 'features': 1},\n", + "{'usage': {'text_units': 1, 'text_characters': 284, 'features': 1},\n", " 'language': 'en',\n", - " 'keywords': [{'text': 'brick wall',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.929032,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", - " 'count': 1},\n", - " {'text': 'true ride',\n", - " 'sentiment': {'score': 0.863267, 'label': 'positive'},\n", - " 'relevance': 0.900583,\n", - " 'emotion': {'sadness': 0.219645,\n", - " 'joy': 0.593742,\n", - " 'fear': 0.087546,\n", - " 'disgust': 0.037523,\n", - " 'anger': 0.08835},\n", + " 'keywords': [{'text': 'waste',\n", + " 'sentiment': {'score': -0.875215, 'label': 'negative'},\n", + " 'relevance': 0.685741,\n", + " 'emotion': {'sadness': 0.192383,\n", + " 'joy': 0.024961,\n", + " 'fear': 0.313145,\n", + " 'disgust': 0.08332,\n", + " 'anger': 0.277825},\n", " 'count': 1},\n", " {'text': 'fire',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.678235,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", - " 'count': 1},\n", - " {'text': 'toys',\n", - " 'sentiment': {'score': -0.738776, 'label': 'negative'},\n", - " 'relevance': 0.632428,\n", - " 'emotion': {'sadness': 0.308881,\n", - " 'joy': 0.49626,\n", - " 'fear': 0.157411,\n", - " 'disgust': 0.011575,\n", - " 'anger': 0.090552},\n", + " 'sentiment': {'score': -0.934513, 'label': 'negative'},\n", + " 'relevance': 0.598326,\n", + " 'emotion': {'sadness': 0.360925,\n", + " 'joy': 0.002355,\n", + " 'fear': 0.26649,\n", + " 'disgust': 0.069938,\n", + " 'anger': 0.442759},\n", " 'count': 1},\n", - " {'text': 'toy',\n", - " 'sentiment': {'score': -0.941767, 'label': 'negative'},\n", - " 'relevance': 0.567418,\n", - " 'emotion': {'sadness': 0.31777,\n", - " 'joy': 0.483067,\n", - " 'fear': 0.163651,\n", - " 'disgust': 0.011704,\n", - " 'anger': 0.094782},\n", + " {'text': 'car',\n", + " 'sentiment': {'score': -0.844774, 'label': 'negative'},\n", + " 'relevance': 0.581432,\n", + " 'emotion': {'sadness': 0.144346,\n", + " 'joy': 0.150177,\n", + " 'fear': 0.246102,\n", + " 'disgust': 0.06176,\n", + " 'anger': 0.203999},\n", + " 'count': 2},\n", + " {'text': 'hell',\n", + " 'sentiment': {'score': -0.934513, 'label': 'negative'},\n", + " 'relevance': 0.577011,\n", + " 'emotion': {'sadness': 0.360925,\n", + " 'joy': 0.002355,\n", + " 'fear': 0.26649,\n", + " 'disgust': 0.069938,\n", + " 'anger': 0.442759},\n", " 'count': 1},\n", - " {'text': 'glass',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.557307,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", - " 'count': 1},\n", - " {'text': 'beast',\n", - " 'sentiment': {'score': 0.863267, 'label': 'positive'},\n", - " 'relevance': 0.550077,\n", - " 'emotion': {'sadness': 0.219645,\n", - " 'joy': 0.593742,\n", - " 'fear': 0.087546,\n", - " 'disgust': 0.037523,\n", - " 'anger': 0.08835},\n", - " 'count': 1},\n", - " {'text': 'ice',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.54733,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", + " {'text': 'year',\n", + " 'sentiment': {'score': -0.875215, 'label': 'negative'},\n", + " 'relevance': 0.563676,\n", + " 'emotion': {'sadness': 0.192383,\n", + " 'joy': 0.024961,\n", + " 'fear': 0.313145,\n", + " 'disgust': 0.08332,\n", + " 'anger': 0.277825},\n", " 'count': 1},\n", - " {'text': 'water',\n", + " {'text': 'time',\n", " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.539073,\n", - " 'emotion': {'sadness': 0, 'joy': 0, 'fear': 0, 'disgust': 0, 'anger': 0},\n", - " 'count': 1},\n", - " {'text': 'name',\n", - " 'sentiment': {'score': -0.738776, 'label': 'negative'},\n", - " 'relevance': 0.539073,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", + " 'relevance': 0.466983,\n", + " 'emotion': {'sadness': 0.266573,\n", + " 'joy': 0.401314,\n", + " 'fear': 0.08908,\n", + " 'disgust': 0.024027,\n", + " 'anger': 0.065767},\n", " 'count': 1}],\n", - " 'analyzed_text': 'a true ride: this beast can go through just about \\ranything you through at it. water, \\rfire, brick wall, glass ,ice , you name \\rit. i like my toys to be tough enough \\rto handle what i through at them. and \\rthis toy has NOT let me down and i do \\rnot think it ever will!!!'}" + " 'analyzed_text': 'What a waste: I have owned this car for a year and a \\rhalf now and it is not reliabile at \\rall. I have driven it through \\reverything and it stalls on me all the \\rtime. I would never buy this car \\ragain. and trying to sell it is like \\rtrying to sell fire in hell, just wont \\rhappen.'}" ] }, "execution_count": 13, @@ -1470,7 +1438,7 @@ { "data": { "text/plain": [ - "'a true ride: this beast can go through just about \\ranything you through at it. water, \\rfire, brick wall, glass ,ice , you name \\rit. i like my toys to be tough enough \\rto handle what i through at them. and \\rthis toy has NOT let me down and i do \\rnot think it ever will!!!'" + "'What a waste: I have owned this car for a year and a \\rhalf now and it is not reliabile at \\rall. I have driven it through \\reverything and it stalls on me all the \\rtime. I would never buy this car \\ragain. and trying to sell it is like \\rtrying to sell fire in hell, just wont \\rhappen.'" ] }, "execution_count": 14, @@ -1490,91 +1458,59 @@ { "data": { "text/plain": [ - "[{'text': 'brick wall',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.929032,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", - " 'count': 1},\n", - " {'text': 'true ride',\n", - " 'sentiment': {'score': 0.863267, 'label': 'positive'},\n", - " 'relevance': 0.900583,\n", - " 'emotion': {'sadness': 0.219645,\n", - " 'joy': 0.593742,\n", - " 'fear': 0.087546,\n", - " 'disgust': 0.037523,\n", - " 'anger': 0.08835},\n", + "[{'text': 'waste',\n", + " 'sentiment': {'score': -0.875215, 'label': 'negative'},\n", + " 'relevance': 0.685741,\n", + " 'emotion': {'sadness': 0.192383,\n", + " 'joy': 0.024961,\n", + " 'fear': 0.313145,\n", + " 'disgust': 0.08332,\n", + " 'anger': 0.277825},\n", " 'count': 1},\n", " {'text': 'fire',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.678235,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", - " 'count': 1},\n", - " {'text': 'toys',\n", - " 'sentiment': {'score': -0.738776, 'label': 'negative'},\n", - " 'relevance': 0.632428,\n", - " 'emotion': {'sadness': 0.308881,\n", - " 'joy': 0.49626,\n", - " 'fear': 0.157411,\n", - " 'disgust': 0.011575,\n", - " 'anger': 0.090552},\n", - " 'count': 1},\n", - " {'text': 'toy',\n", - " 'sentiment': {'score': -0.941767, 'label': 'negative'},\n", - " 'relevance': 0.567418,\n", - " 'emotion': {'sadness': 0.31777,\n", - " 'joy': 0.483067,\n", - " 'fear': 0.163651,\n", - " 'disgust': 0.011704,\n", - " 'anger': 0.094782},\n", - " 'count': 1},\n", - " {'text': 'glass',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.557307,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", + " 'sentiment': {'score': -0.934513, 'label': 'negative'},\n", + " 'relevance': 0.598326,\n", + " 'emotion': {'sadness': 0.360925,\n", + " 'joy': 0.002355,\n", + " 'fear': 0.26649,\n", + " 'disgust': 0.069938,\n", + " 'anger': 0.442759},\n", " 'count': 1},\n", - " {'text': 'beast',\n", - " 'sentiment': {'score': 0.863267, 'label': 'positive'},\n", - " 'relevance': 0.550077,\n", - " 'emotion': {'sadness': 0.219645,\n", - " 'joy': 0.593742,\n", - " 'fear': 0.087546,\n", - " 'disgust': 0.037523,\n", - " 'anger': 0.08835},\n", + " {'text': 'car',\n", + " 'sentiment': {'score': -0.844774, 'label': 'negative'},\n", + " 'relevance': 0.581432,\n", + " 'emotion': {'sadness': 0.144346,\n", + " 'joy': 0.150177,\n", + " 'fear': 0.246102,\n", + " 'disgust': 0.06176,\n", + " 'anger': 0.203999},\n", + " 'count': 2},\n", + " {'text': 'hell',\n", + " 'sentiment': {'score': -0.934513, 'label': 'negative'},\n", + " 'relevance': 0.577011,\n", + " 'emotion': {'sadness': 0.360925,\n", + " 'joy': 0.002355,\n", + " 'fear': 0.26649,\n", + " 'disgust': 0.069938,\n", + " 'anger': 0.442759},\n", " 'count': 1},\n", - " {'text': 'ice',\n", - " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.54733,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", + " {'text': 'year',\n", + " 'sentiment': {'score': -0.875215, 'label': 'negative'},\n", + " 'relevance': 0.563676,\n", + " 'emotion': {'sadness': 0.192383,\n", + " 'joy': 0.024961,\n", + " 'fear': 0.313145,\n", + " 'disgust': 0.08332,\n", + " 'anger': 0.277825},\n", " 'count': 1},\n", - " {'text': 'water',\n", + " {'text': 'time',\n", " 'sentiment': {'score': 0, 'label': 'neutral'},\n", - " 'relevance': 0.539073,\n", - " 'emotion': {'sadness': 0, 'joy': 0, 'fear': 0, 'disgust': 0, 'anger': 0},\n", - " 'count': 1},\n", - " {'text': 'name',\n", - " 'sentiment': {'score': -0.738776, 'label': 'negative'},\n", - " 'relevance': 0.539073,\n", - " 'emotion': {'sadness': 0.29279,\n", - " 'joy': 0.230829,\n", - " 'fear': 0.175379,\n", - " 'disgust': 0.180316,\n", - " 'anger': 0.225414},\n", + " 'relevance': 0.466983,\n", + " 'emotion': {'sadness': 0.266573,\n", + " 'joy': 0.401314,\n", + " 'fear': 0.08908,\n", + " 'disgust': 0.024027,\n", + " 'anger': 0.065767},\n", " 'count': 1}]" ] }, @@ -1613,29 +1549,21 @@ " 'entity_mentions': Empty DataFrame\n", " Columns: []\n", " Index: [],\n", - " 'keywords': text sentiment.label sentiment.score relevance emotion.sadness \\\n", - " 0 brick wall neutral 0.000000 0.929032 0.292790 \n", - " 1 true ride positive 0.863267 0.900583 0.219645 \n", - " 2 fire neutral 0.000000 0.678235 0.292790 \n", - " 3 toys negative -0.738776 0.632428 0.308881 \n", - " 4 toy negative -0.941767 0.567418 0.317770 \n", - " 5 glass neutral 0.000000 0.557307 0.292790 \n", - " 6 beast positive 0.863267 0.550077 0.219645 \n", - " 7 ice neutral 0.000000 0.547330 0.292790 \n", - " 8 water neutral 0.000000 0.539073 0.000000 \n", - " 9 name negative -0.738776 0.539073 0.292790 \n", + " 'keywords': text sentiment.label sentiment.score relevance emotion.sadness \\\n", + " 0 waste negative -0.875215 0.685741 0.192383 \n", + " 1 fire negative -0.934513 0.598326 0.360925 \n", + " 2 car negative -0.844774 0.581432 0.144346 \n", + " 3 hell negative -0.934513 0.577011 0.360925 \n", + " 4 year negative -0.875215 0.563676 0.192383 \n", + " 5 time neutral 0.000000 0.466983 0.266573 \n", " \n", " emotion.joy emotion.fear emotion.disgust emotion.anger count \n", - " 0 0.230829 0.175379 0.180316 0.225414 1 \n", - " 1 0.593742 0.087546 0.037523 0.088350 1 \n", - " 2 0.230829 0.175379 0.180316 0.225414 1 \n", - " 3 0.496260 0.157411 0.011575 0.090552 1 \n", - " 4 0.483067 0.163651 0.011704 0.094782 1 \n", - " 5 0.230829 0.175379 0.180316 0.225414 1 \n", - " 6 0.593742 0.087546 0.037523 0.088350 1 \n", - " 7 0.230829 0.175379 0.180316 0.225414 1 \n", - " 8 0.000000 0.000000 0.000000 0.000000 1 \n", - " 9 0.230829 0.175379 0.180316 0.225414 1 ,\n", + " 0 0.024961 0.313145 0.083320 0.277825 1 \n", + " 1 0.002355 0.266490 0.069938 0.442759 1 \n", + " 2 0.150177 0.246102 0.061760 0.203999 2 \n", + " 3 0.002355 0.266490 0.069938 0.442759 1 \n", + " 4 0.024961 0.313145 0.083320 0.277825 1 \n", + " 5 0.401314 0.089080 0.024027 0.065767 1 ,\n", " 'relations': Empty DataFrame\n", " Columns: []\n", " Index: [],\n", @@ -1722,132 +1650,80 @@ " \n", " \n", " 0\n", - " brick wall\n", - " neutral\n", - " 0.000000\n", - " 0.929032\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", + " waste\n", + " negative\n", + " -0.875215\n", + " 0.685741\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1\n", " \n", " \n", " 1\n", - " true ride\n", - " positive\n", - " 0.863267\n", - " 0.900583\n", - " 0.219645\n", - " 0.593742\n", - " 0.087546\n", - " 0.037523\n", - " 0.088350\n", + " fire\n", + " negative\n", + " -0.934513\n", + " 0.598326\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1\n", " \n", " \n", " 2\n", - " fire\n", - " neutral\n", - " 0.000000\n", - " 0.678235\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", + " car\n", + " negative\n", + " -0.844774\n", + " 0.581432\n", + " 0.144346\n", + " 0.150177\n", + " 0.246102\n", + " 0.061760\n", + " 0.203999\n", + " 2\n", " \n", " \n", " 3\n", - " toys\n", + " hell\n", " negative\n", - " -0.738776\n", - " 0.632428\n", - " 0.308881\n", - " 0.496260\n", - " 0.157411\n", - " 0.011575\n", - " 0.090552\n", + " -0.934513\n", + " 0.577011\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1\n", " \n", " \n", " 4\n", - " toy\n", + " year\n", " negative\n", - " -0.941767\n", - " 0.567418\n", - " 0.317770\n", - " 0.483067\n", - " 0.163651\n", - " 0.011704\n", - " 0.094782\n", + " -0.875215\n", + " 0.563676\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1\n", " \n", " \n", " 5\n", - " glass\n", - " neutral\n", - " 0.000000\n", - " 0.557307\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " \n", - " \n", - " 6\n", - " beast\n", - " positive\n", - " 0.863267\n", - " 0.550077\n", - " 0.219645\n", - " 0.593742\n", - " 0.087546\n", - " 0.037523\n", - " 0.088350\n", - " 1\n", - " \n", - " \n", - " 7\n", - " ice\n", - " neutral\n", - " 0.000000\n", - " 0.547330\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " \n", - " \n", - " 8\n", - " water\n", + " time\n", " neutral\n", " 0.000000\n", - " 0.539073\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 1\n", - " \n", - " \n", - " 9\n", - " name\n", - " negative\n", - " -0.738776\n", - " 0.539073\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", + " 0.466983\n", + " 0.266573\n", + " 0.401314\n", + " 0.089080\n", + " 0.024027\n", + " 0.065767\n", " 1\n", " \n", " \n", @@ -1855,29 +1731,21 @@ "" ], "text/plain": [ - " text sentiment.label sentiment.score relevance emotion.sadness \\\n", - "0 brick wall neutral 0.000000 0.929032 0.292790 \n", - "1 true ride positive 0.863267 0.900583 0.219645 \n", - "2 fire neutral 0.000000 0.678235 0.292790 \n", - "3 toys negative -0.738776 0.632428 0.308881 \n", - "4 toy negative -0.941767 0.567418 0.317770 \n", - "5 glass neutral 0.000000 0.557307 0.292790 \n", - "6 beast positive 0.863267 0.550077 0.219645 \n", - "7 ice neutral 0.000000 0.547330 0.292790 \n", - "8 water neutral 0.000000 0.539073 0.000000 \n", - "9 name negative -0.738776 0.539073 0.292790 \n", + " text sentiment.label sentiment.score relevance emotion.sadness \\\n", + "0 waste negative -0.875215 0.685741 0.192383 \n", + "1 fire negative -0.934513 0.598326 0.360925 \n", + "2 car negative -0.844774 0.581432 0.144346 \n", + "3 hell negative -0.934513 0.577011 0.360925 \n", + "4 year negative -0.875215 0.563676 0.192383 \n", + "5 time neutral 0.000000 0.466983 0.266573 \n", "\n", " emotion.joy emotion.fear emotion.disgust emotion.anger count \n", - "0 0.230829 0.175379 0.180316 0.225414 1 \n", - "1 0.593742 0.087546 0.037523 0.088350 1 \n", - "2 0.230829 0.175379 0.180316 0.225414 1 \n", - "3 0.496260 0.157411 0.011575 0.090552 1 \n", - "4 0.483067 0.163651 0.011704 0.094782 1 \n", - "5 0.230829 0.175379 0.180316 0.225414 1 \n", - "6 0.593742 0.087546 0.037523 0.088350 1 \n", - "7 0.230829 0.175379 0.180316 0.225414 1 \n", - "8 0.000000 0.000000 0.000000 0.000000 1 \n", - "9 0.230829 0.175379 0.180316 0.225414 1 " + "0 0.024961 0.313145 0.083320 0.277825 1 \n", + "1 0.002355 0.266490 0.069938 0.442759 1 \n", + "2 0.150177 0.246102 0.061760 0.203999 2 \n", + "3 0.002355 0.266490 0.069938 0.442759 1 \n", + "4 0.024961 0.313145 0.083320 0.277825 1 \n", + "5 0.401314 0.089080 0.024027 0.065767 1 " ] }, "execution_count": 18, @@ -1951,184 +1819,116 @@ " \n", " \n", " 0\n", - " brick wall\n", - " neutral\n", - " 0.000000\n", - " 0.929032\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", + " waste\n", + " negative\n", + " -0.875215\n", + " 0.685741\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 1\n", - " true ride\n", - " positive\n", - " 0.863267\n", - " 0.900583\n", - " 0.219645\n", - " 0.593742\n", - " 0.087546\n", - " 0.037523\n", - " 0.088350\n", + " fire\n", + " negative\n", + " -0.934513\n", + " 0.598326\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 2\n", - " fire\n", - " neutral\n", - " 0.000000\n", - " 0.678235\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " a true ride: this beast can go through just ab...\n", + " car\n", + " negative\n", + " -0.844774\n", + " 0.581432\n", + " 0.144346\n", + " 0.150177\n", + " 0.246102\n", + " 0.061760\n", + " 0.203999\n", + " 2\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 3\n", - " toys\n", + " hell\n", " negative\n", - " -0.738776\n", - " 0.632428\n", - " 0.308881\n", - " 0.496260\n", - " 0.157411\n", - " 0.011575\n", - " 0.090552\n", + " -0.934513\n", + " 0.577011\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 4\n", - " toy\n", + " year\n", " negative\n", - " -0.941767\n", - " 0.567418\n", - " 0.317770\n", - " 0.483067\n", - " 0.163651\n", - " 0.011704\n", - " 0.094782\n", + " -0.875215\n", + " 0.563676\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 5\n", - " glass\n", - " neutral\n", - " 0.000000\n", - " 0.557307\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 6\n", - " beast\n", - " positive\n", - " 0.863267\n", - " 0.550077\n", - " 0.219645\n", - " 0.593742\n", - " 0.087546\n", - " 0.037523\n", - " 0.088350\n", - " 1\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 7\n", - " ice\n", - " neutral\n", - " 0.000000\n", - " 0.547330\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 8\n", - " water\n", + " time\n", " neutral\n", " 0.000000\n", - " 0.539073\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 1\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 9\n", - " name\n", - " negative\n", - " -0.738776\n", - " 0.539073\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", + " 0.466983\n", + " 0.266573\n", + " 0.401314\n", + " 0.089080\n", + " 0.024027\n", + " 0.065767\n", " 1\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " text sentiment.label sentiment.score relevance emotion.sadness \\\n", - "0 brick wall neutral 0.000000 0.929032 0.292790 \n", - "1 true ride positive 0.863267 0.900583 0.219645 \n", - "2 fire neutral 0.000000 0.678235 0.292790 \n", - "3 toys negative -0.738776 0.632428 0.308881 \n", - "4 toy negative -0.941767 0.567418 0.317770 \n", - "5 glass neutral 0.000000 0.557307 0.292790 \n", - "6 beast positive 0.863267 0.550077 0.219645 \n", - "7 ice neutral 0.000000 0.547330 0.292790 \n", - "8 water neutral 0.000000 0.539073 0.000000 \n", - "9 name negative -0.738776 0.539073 0.292790 \n", + " text sentiment.label sentiment.score relevance emotion.sadness \\\n", + "0 waste negative -0.875215 0.685741 0.192383 \n", + "1 fire negative -0.934513 0.598326 0.360925 \n", + "2 car negative -0.844774 0.581432 0.144346 \n", + "3 hell negative -0.934513 0.577011 0.360925 \n", + "4 year negative -0.875215 0.563676 0.192383 \n", + "5 time neutral 0.000000 0.466983 0.266573 \n", "\n", " emotion.joy emotion.fear emotion.disgust emotion.anger count \\\n", - "0 0.230829 0.175379 0.180316 0.225414 1 \n", - "1 0.593742 0.087546 0.037523 0.088350 1 \n", - "2 0.230829 0.175379 0.180316 0.225414 1 \n", - "3 0.496260 0.157411 0.011575 0.090552 1 \n", - "4 0.483067 0.163651 0.011704 0.094782 1 \n", - "5 0.230829 0.175379 0.180316 0.225414 1 \n", - "6 0.593742 0.087546 0.037523 0.088350 1 \n", - "7 0.230829 0.175379 0.180316 0.225414 1 \n", - "8 0.000000 0.000000 0.000000 0.000000 1 \n", - "9 0.230829 0.175379 0.180316 0.225414 1 \n", + "0 0.024961 0.313145 0.083320 0.277825 1 \n", + "1 0.002355 0.266490 0.069938 0.442759 1 \n", + "2 0.150177 0.246102 0.061760 0.203999 2 \n", + "3 0.002355 0.266490 0.069938 0.442759 1 \n", + "4 0.024961 0.313145 0.083320 0.277825 1 \n", + "5 0.401314 0.089080 0.024027 0.065767 1 \n", "\n", " 0 \n", - "0 a true ride: this beast can go through just ab... \n", - "1 a true ride: this beast can go through just ab... \n", - "2 a true ride: this beast can go through just ab... \n", - "3 a true ride: this beast can go through just ab... \n", - "4 a true ride: this beast can go through just ab... \n", - "5 a true ride: this beast can go through just ab... \n", - "6 a true ride: this beast can go through just ab... \n", - "7 a true ride: this beast can go through just ab... \n", - "8 a true ride: this beast can go through just ab... \n", - "9 a true ride: this beast can go through just ab... " + "0 What a waste: I have owned this car for a year... \n", + "1 What a waste: I have owned this car for a year... \n", + "2 What a waste: I have owned this car for a year... \n", + "3 What a waste: I have owned this car for a year... \n", + "4 What a waste: I have owned this car for a year... \n", + "5 What a waste: I have owned this car for a year... " ] }, "execution_count": 19, @@ -2197,290 +1997,182 @@ " \n", " \n", " 0\n", - " brick wall\n", - " neutral\n", - " 0.000000\n", - " 0.929032\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", + " waste\n", + " negative\n", + " -0.875215\n", + " 0.685741\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", + " on 06/15/02 00:00 AM (PDT)\n", + " mike6382\n", + " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", + " What a waste\n", + " I have owned this car for a year and a \\rhalf...\n", + " 1.0\n", " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 1\n", - " true ride\n", - " positive\n", - " 0.863267\n", - " 0.900583\n", - " 0.219645\n", - " 0.593742\n", - " 0.087546\n", - " 0.037523\n", - " 0.088350\n", + " fire\n", + " negative\n", + " -0.934513\n", + " 0.598326\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", + " on 06/15/02 00:00 AM (PDT)\n", + " mike6382\n", + " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", + " What a waste\n", + " I have owned this car for a year and a \\rhalf...\n", + " 1.0\n", " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 2\n", - " fire\n", - " neutral\n", - " 0.000000\n", - " 0.678235\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", + " car\n", + " negative\n", + " -0.844774\n", + " 0.581432\n", + " 0.144346\n", + " 0.150177\n", + " 0.246102\n", + " 0.061760\n", + " 0.203999\n", + " 2\n", + " on 06/15/02 00:00 AM (PDT)\n", + " mike6382\n", + " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", + " What a waste\n", + " I have owned this car for a year and a \\rhalf...\n", + " 1.0\n", " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 3\n", - " toys\n", + " hell\n", " negative\n", - " -0.738776\n", - " 0.632428\n", - " 0.308881\n", - " 0.496260\n", - " 0.157411\n", - " 0.011575\n", - " 0.090552\n", + " -0.934513\n", + " 0.577011\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", + " on 06/15/02 00:00 AM (PDT)\n", + " mike6382\n", + " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", + " What a waste\n", + " I have owned this car for a year and a \\rhalf...\n", + " 1.0\n", " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 4\n", - " toy\n", + " year\n", " negative\n", - " -0.941767\n", - " 0.567418\n", - " 0.317770\n", - " 0.483067\n", - " 0.163651\n", - " 0.011704\n", - " 0.094782\n", + " -0.875215\n", + " 0.563676\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", + " on 06/15/02 00:00 AM (PDT)\n", + " mike6382\n", + " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", + " What a waste\n", + " I have owned this car for a year and a \\rhalf...\n", + " 1.0\n", " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 5\n", - " glass\n", - " neutral\n", - " 0.000000\n", - " 0.557307\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", - " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 6\n", - " beast\n", - " positive\n", - " 0.863267\n", - " 0.550077\n", - " 0.219645\n", - " 0.593742\n", - " 0.087546\n", - " 0.037523\n", - " 0.088350\n", - " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", - " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 7\n", - " ice\n", - " neutral\n", - " 0.000000\n", - " 0.547330\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", - " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", - " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 8\n", - " water\n", + " time\n", " neutral\n", " 0.000000\n", - " 0.539073\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", - " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", - " \n", - " \n", - " 9\n", - " name\n", - " negative\n", - " -0.738776\n", - " 0.539073\n", - " 0.292790\n", - " 0.230829\n", - " 0.175379\n", - " 0.180316\n", - " 0.225414\n", + " 0.466983\n", + " 0.266573\n", + " 0.401314\n", + " 0.089080\n", + " 0.024027\n", + " 0.065767\n", " 1\n", - " on 08/30/02 00:00 AM (PDT)\n", - " bluice3309\n", - " 2000 AM General Hummer SUV 4dr SUV AWD\n", - " a true ride\n", - " this beast can go through just about \\ranythi...\n", - " 4.625\n", + " on 06/15/02 00:00 AM (PDT)\n", + " mike6382\n", + " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", + " What a waste\n", + " I have owned this car for a year and a \\rhalf...\n", + " 1.0\n", " AMGeneral\n", - " a true ride: this beast can go through just ab...\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " text sentiment.label sentiment.score relevance emotion.sadness \\\n", - "0 brick wall neutral 0.000000 0.929032 0.292790 \n", - "1 true ride positive 0.863267 0.900583 0.219645 \n", - "2 fire neutral 0.000000 0.678235 0.292790 \n", - "3 toys negative -0.738776 0.632428 0.308881 \n", - "4 toy negative -0.941767 0.567418 0.317770 \n", - "5 glass neutral 0.000000 0.557307 0.292790 \n", - "6 beast positive 0.863267 0.550077 0.219645 \n", - "7 ice neutral 0.000000 0.547330 0.292790 \n", - "8 water neutral 0.000000 0.539073 0.000000 \n", - "9 name negative -0.738776 0.539073 0.292790 \n", + " text sentiment.label sentiment.score relevance emotion.sadness \\\n", + "0 waste negative -0.875215 0.685741 0.192383 \n", + "1 fire negative -0.934513 0.598326 0.360925 \n", + "2 car negative -0.844774 0.581432 0.144346 \n", + "3 hell negative -0.934513 0.577011 0.360925 \n", + "4 year negative -0.875215 0.563676 0.192383 \n", + "5 time neutral 0.000000 0.466983 0.266573 \n", "\n", " emotion.joy emotion.fear emotion.disgust emotion.anger count \\\n", - "0 0.230829 0.175379 0.180316 0.225414 1 \n", - "1 0.593742 0.087546 0.037523 0.088350 1 \n", - "2 0.230829 0.175379 0.180316 0.225414 1 \n", - "3 0.496260 0.157411 0.011575 0.090552 1 \n", - "4 0.483067 0.163651 0.011704 0.094782 1 \n", - "5 0.230829 0.175379 0.180316 0.225414 1 \n", - "6 0.593742 0.087546 0.037523 0.088350 1 \n", - "7 0.230829 0.175379 0.180316 0.225414 1 \n", - "8 0.000000 0.000000 0.000000 0.000000 1 \n", - "9 0.230829 0.175379 0.180316 0.225414 1 \n", + "0 0.024961 0.313145 0.083320 0.277825 1 \n", + "1 0.002355 0.266490 0.069938 0.442759 1 \n", + "2 0.150177 0.246102 0.061760 0.203999 2 \n", + "3 0.002355 0.266490 0.069938 0.442759 1 \n", + "4 0.024961 0.313145 0.083320 0.277825 1 \n", + "5 0.401314 0.089080 0.024027 0.065767 1 \n", "\n", - " Review_Date Author_Name \\\n", - "0 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "1 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "2 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "3 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "4 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "5 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "6 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "7 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "8 on 08/30/02 00:00 AM (PDT) bluice3309 \n", - "9 on 08/30/02 00:00 AM (PDT) bluice3309 \n", + " Review_Date Author_Name \\\n", + "0 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "1 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "2 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "3 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "4 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "5 on 06/15/02 00:00 AM (PDT) mike6382 \n", "\n", - " Vehicle_Title Review_Title \\\n", - "0 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "1 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "2 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "3 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "4 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "5 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "6 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "7 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "8 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", - "9 2000 AM General Hummer SUV 4dr SUV AWD a true ride \n", + " Vehicle_Title Review_Title \\\n", + "0 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "1 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "2 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "3 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "4 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "5 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", "\n", " Review Rating\\r Car_Make \\\n", - "0 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "1 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "2 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "3 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "4 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "5 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "6 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "7 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "8 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", - "9 this beast can go through just about \\ranythi... 4.625 AMGeneral \n", + "0 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "1 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "2 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "3 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "4 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "5 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", "\n", " Review_Content \n", - "0 a true ride: this beast can go through just ab... \n", - "1 a true ride: this beast can go through just ab... \n", - "2 a true ride: this beast can go through just ab... \n", - "3 a true ride: this beast can go through just ab... \n", - "4 a true ride: this beast can go through just ab... \n", - "5 a true ride: this beast can go through just ab... \n", - "6 a true ride: this beast can go through just ab... \n", - "7 a true ride: this beast can go through just ab... \n", - "8 a true ride: this beast can go through just ab... \n", - "9 a true ride: this beast can go through just ab... " + "0 What a waste: I have owned this car for a year... \n", + "1 What a waste: I have owned this car for a year... \n", + "2 What a waste: I have owned this car for a year... \n", + "3 What a waste: I have owned this car for a year... \n", + "4 What a waste: I have owned this car for a year... \n", + "5 What a waste: I have owned this car for a year... " ] }, "execution_count": 20, @@ -2578,370 +2270,370 @@ " \n", " \n", " \n", - " 0\n", - " count\n", - " emotion.anger\n", - " emotion.disgust\n", - " emotion.fear\n", - " emotion.joy\n", - " emotion.sadness\n", - " relevance\n", + " text\n", " sentiment.label\n", " sentiment.score\n", - " text\n", + " relevance\n", + " emotion.sadness\n", + " emotion.joy\n", + " emotion.fear\n", + " emotion.disgust\n", + " emotion.anger\n", + " count\n", + " 0\n", " \n", " \n", " \n", " \n", " 0\n", - " a true ride: this beast can go through just ab...\n", + " waste\n", + " negative\n", + " -0.875215\n", + " 0.685741\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1.0\n", - " 0.225414\n", - " 0.180316\n", - " 0.175379\n", - " 0.230829\n", - " 0.292790\n", - " 0.929032\n", - " neutral\n", - " 0.000000\n", - " brick wall\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 1\n", - " a true ride: this beast can go through just ab...\n", + " fire\n", + " negative\n", + " -0.934513\n", + " 0.598326\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1.0\n", - " 0.088350\n", - " 0.037523\n", - " 0.087546\n", - " 0.593742\n", - " 0.219645\n", - " 0.900583\n", - " positive\n", - " 0.863267\n", - " true ride\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 2\n", - " a true ride: this beast can go through just ab...\n", - " 1.0\n", - " 0.225414\n", - " 0.180316\n", - " 0.175379\n", - " 0.230829\n", - " 0.292790\n", - " 0.678235\n", - " neutral\n", - " 0.000000\n", - " fire\n", + " car\n", + " negative\n", + " -0.844774\n", + " 0.581432\n", + " 0.144346\n", + " 0.150177\n", + " 0.246102\n", + " 0.061760\n", + " 0.203999\n", + " 2.0\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 3\n", - " a true ride: this beast can go through just ab...\n", - " 1.0\n", - " 0.090552\n", - " 0.011575\n", - " 0.157411\n", - " 0.496260\n", - " 0.308881\n", - " 0.632428\n", + " hell\n", " negative\n", - " -0.738776\n", - " toys\n", + " -0.934513\n", + " 0.577011\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", + " 1.0\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 4\n", - " a true ride: this beast can go through just ab...\n", - " 1.0\n", - " 0.094782\n", - " 0.011704\n", - " 0.163651\n", - " 0.483067\n", - " 0.317770\n", - " 0.567418\n", + " year\n", " negative\n", - " -0.941767\n", - " toy\n", + " -0.875215\n", + " 0.563676\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", + " 1.0\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", " 5\n", - " a true ride: this beast can go through just ab...\n", - " 1.0\n", - " 0.225414\n", - " 0.180316\n", - " 0.175379\n", - " 0.230829\n", - " 0.292790\n", - " 0.557307\n", + " time\n", " neutral\n", " 0.000000\n", - " glass\n", + " 0.466983\n", + " 0.266573\n", + " 0.401314\n", + " 0.089080\n", + " 0.024027\n", + " 0.065767\n", + " 1.0\n", + " What a waste: I have owned this car for a year...\n", " \n", " \n", - " 6\n", - " a true ride: this beast can go through just ab...\n", + " 0\n", + " Top speed\n", + " negative\n", + " -0.537564\n", + " 0.881037\n", + " 0.509224\n", + " 0.199172\n", + " 0.038777\n", + " 0.065161\n", + " 0.044472\n", " 1.0\n", - " 0.088350\n", - " 0.037523\n", - " 0.087546\n", - " 0.593742\n", - " 0.219645\n", - " 0.550077\n", - " positive\n", - " 0.863267\n", - " beast\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 7\n", - " a true ride: this beast can go through just ab...\n", + " 1\n", + " OK cause\n", + " positive\n", + " 0.647515\n", + " 0.786985\n", + " 0.063022\n", + " 0.432975\n", + " 0.107965\n", + " 0.016918\n", + " 0.090944\n", " 1.0\n", - " 0.225414\n", - " 0.180316\n", - " 0.175379\n", - " 0.230829\n", - " 0.292790\n", - " 0.547330\n", - " neutral\n", - " 0.000000\n", - " ice\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 8\n", - " a true ride: this beast can go through just ab...\n", - " 1.0\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.539073\n", + " 2\n", + " HUMMER H\n", " neutral\n", " 0.000000\n", - " water\n", + " 0.639671\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 1.0\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 9\n", - " a true ride: this beast can go through just ab...\n", - " 1.0\n", - " 0.225414\n", - " 0.180316\n", - " 0.175379\n", - " 0.230829\n", - " 0.292790\n", - " 0.539073\n", + " 3\n", + " seat cushion\n", " negative\n", - " -0.738776\n", - " name\n", + " -0.537564\n", + " 0.593566\n", + " 0.509224\n", + " 0.199172\n", + " 0.038777\n", + " 0.065161\n", + " 0.044472\n", + " 1.0\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 0\n", - " What a waste: I have owned this car for a year...\n", - " 1.0\n", - " 0.457683\n", - " 0.244124\n", - " 0.130232\n", - " 0.015328\n", - " 0.494477\n", - " 0.685741\n", + " 4\n", + " HUMMER\n", " negative\n", - " -0.875214\n", - " waste\n", + " -0.913874\n", + " 0.582162\n", + " 0.172604\n", + " 0.221188\n", + " 0.146469\n", + " 0.022002\n", + " 0.029588\n", + " 1.0\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 1\n", - " What a waste: I have owned this car for a year...\n", + " 5\n", + " speed\n", + " positive\n", + " 0.305110\n", + " 0.548092\n", + " 0.286123\n", + " 0.316074\n", + " 0.073371\n", + " 0.041039\n", + " 0.067708\n", " 1.0\n", - " 0.665425\n", - " 0.238400\n", - " 0.116199\n", - " 0.010869\n", - " 0.283251\n", - " 0.598326\n", - " negative\n", - " -0.934512\n", - " fire\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 2\n", - " What a waste: I have owned this car for a year...\n", - " 2.0\n", - " 0.488137\n", - " 0.223132\n", - " 0.195960\n", - " 0.014601\n", - " 0.407834\n", - " 0.581432\n", + " 6\n", + " thing\n", " negative\n", - " -0.875214\n", - " car\n", + " -0.949193\n", + " 0.534867\n", + " 0.645165\n", + " 0.010028\n", + " 0.216581\n", + " 0.022772\n", + " 0.055427\n", + " 1.0\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 3\n", - " What a waste: I have owned this car for a year...\n", + " 7\n", + " Vehicle\n", + " negative\n", + " -0.961235\n", + " 0.531410\n", + " 0.180207\n", + " 0.061332\n", + " 0.192902\n", + " 0.008274\n", + " 0.046232\n", " 1.0\n", - " 0.665425\n", - " 0.238400\n", - " 0.116199\n", - " 0.010869\n", - " 0.283251\n", - " 0.577011\n", - " neutral\n", - " 0.000000\n", - " hell\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 4\n", - " What a waste: I have owned this car for a year...\n", - " 1.0\n", - " 0.457683\n", - " 0.244124\n", - " 0.130232\n", - " 0.015328\n", - " 0.494477\n", - " 0.563676\n", + " 8\n", + " beast\n", " negative\n", - " -0.875214\n", - " year\n", + " -0.961235\n", + " 0.524488\n", + " 0.180207\n", + " 0.061332\n", + " 0.192902\n", + " 0.008274\n", + " 0.046232\n", + " 1.0\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 5\n", - " What a waste: I have owned this car for a year...\n", + " 9\n", + " thats\n", + " positive\n", + " 0.647515\n", + " 0.462435\n", + " 0.063022\n", + " 0.432975\n", + " 0.107965\n", + " 0.016918\n", + " 0.090944\n", " 1.0\n", - " 0.345149\n", - " 0.351359\n", - " 0.333530\n", - " 0.024940\n", - " 0.245249\n", - " 0.475221\n", - " negative\n", - " -0.665741\n", - " reliabile\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 6\n", - " What a waste: I have owned this car for a year...\n", - " 1.0\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.475221\n", + " 10\n", + " bummer\n", " negative\n", - " -0.813165\n", - " time\n", + " -0.961235\n", + " 0.360189\n", + " 0.180207\n", + " 0.061332\n", + " 0.192902\n", + " 0.008274\n", + " 0.046232\n", + " 1.0\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", - " 7\n", - " What a waste: I have owned this car for a year...\n", + " 11\n", + " average\n", + " negative\n", + " -0.857270\n", + " 0.341687\n", + " 0.165002\n", + " 0.381043\n", + " 0.100036\n", + " 0.035730\n", + " 0.012944\n", " 1.0\n", - " 0.665425\n", - " 0.238400\n", - " 0.116199\n", - " 0.010869\n", - " 0.283251\n", - " 0.475221\n", - " neutral\n", - " 0.000000\n", - " wont\n", + " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", " \n", " \n", " 0\n", - " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", + " AWESOME HUMMER\n", + " positive\n", + " 0.734682\n", + " 0.833177\n", + " 0.032499\n", + " 0.493942\n", + " 0.116809\n", + " 0.009257\n", + " 0.024046\n", " 1.0\n", - " 0.134550\n", - " 0.289156\n", - " 0.079885\n", - " 0.033319\n", - " 0.170084\n", - " 0.881037\n", - " negative\n", - " -0.537564\n", - " Top speed\n", + " AWESOME HUMMER: Hummer is unstoppable. May onl...\n", " \n", " \n", " 1\n", - " HUMMER NOT A bummer : Vehicle is a beast. I do...\n", + " mph\n", + " neutral\n", + " 0.000000\n", + " 0.635404\n", + " 0.499977\n", + " 0.151388\n", + " 0.039640\n", + " 0.036049\n", + " 0.064654\n", " 1.0\n", - " 0.053211\n", - " 0.021603\n", - " 0.467839\n", - " 0.038405\n", - " 0.115986\n", - " 0.790468\n", - " negative\n", - " -0.857269\n", - " HUMMER H1\n", + " AWESOME HUMMER: Hummer is unstoppable. May onl...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " 0 count emotion.anger \\\n", - "0 a true ride: this beast can go through just ab... 1.0 0.225414 \n", - "1 a true ride: this beast can go through just ab... 1.0 0.088350 \n", - "2 a true ride: this beast can go through just ab... 1.0 0.225414 \n", - "3 a true ride: this beast can go through just ab... 1.0 0.090552 \n", - "4 a true ride: this beast can go through just ab... 1.0 0.094782 \n", - "5 a true ride: this beast can go through just ab... 1.0 0.225414 \n", - "6 a true ride: this beast can go through just ab... 1.0 0.088350 \n", - "7 a true ride: this beast can go through just ab... 1.0 0.225414 \n", - "8 a true ride: this beast can go through just ab... 1.0 0.000000 \n", - "9 a true ride: this beast can go through just ab... 1.0 0.225414 \n", - "0 What a waste: I have owned this car for a year... 1.0 0.457683 \n", - "1 What a waste: I have owned this car for a year... 1.0 0.665425 \n", - "2 What a waste: I have owned this car for a year... 2.0 0.488137 \n", - "3 What a waste: I have owned this car for a year... 1.0 0.665425 \n", - "4 What a waste: I have owned this car for a year... 1.0 0.457683 \n", - "5 What a waste: I have owned this car for a year... 1.0 0.345149 \n", - "6 What a waste: I have owned this car for a year... 1.0 0.000000 \n", - "7 What a waste: I have owned this car for a year... 1.0 0.665425 \n", - "0 HUMMER NOT A bummer : Vehicle is a beast. I do... 1.0 0.134550 \n", - "1 HUMMER NOT A bummer : Vehicle is a beast. I do... 1.0 0.053211 \n", + " text sentiment.label sentiment.score relevance \\\n", + "0 waste negative -0.875215 0.685741 \n", + "1 fire negative -0.934513 0.598326 \n", + "2 car negative -0.844774 0.581432 \n", + "3 hell negative -0.934513 0.577011 \n", + "4 year negative -0.875215 0.563676 \n", + "5 time neutral 0.000000 0.466983 \n", + "0 Top speed negative -0.537564 0.881037 \n", + "1 OK cause positive 0.647515 0.786985 \n", + "2 HUMMER H neutral 0.000000 0.639671 \n", + "3 seat cushion negative -0.537564 0.593566 \n", + "4 HUMMER negative -0.913874 0.582162 \n", + "5 speed positive 0.305110 0.548092 \n", + "6 thing negative -0.949193 0.534867 \n", + "7 Vehicle negative -0.961235 0.531410 \n", + "8 beast negative -0.961235 0.524488 \n", + "9 thats positive 0.647515 0.462435 \n", + "10 bummer negative -0.961235 0.360189 \n", + "11 average negative -0.857270 0.341687 \n", + "0 AWESOME HUMMER positive 0.734682 0.833177 \n", + "1 mph neutral 0.000000 0.635404 \n", "\n", - " emotion.disgust emotion.fear emotion.joy emotion.sadness relevance \\\n", - "0 0.180316 0.175379 0.230829 0.292790 0.929032 \n", - "1 0.037523 0.087546 0.593742 0.219645 0.900583 \n", - "2 0.180316 0.175379 0.230829 0.292790 0.678235 \n", - "3 0.011575 0.157411 0.496260 0.308881 0.632428 \n", - "4 0.011704 0.163651 0.483067 0.317770 0.567418 \n", - "5 0.180316 0.175379 0.230829 0.292790 0.557307 \n", - "6 0.037523 0.087546 0.593742 0.219645 0.550077 \n", - "7 0.180316 0.175379 0.230829 0.292790 0.547330 \n", - "8 0.000000 0.000000 0.000000 0.000000 0.539073 \n", - "9 0.180316 0.175379 0.230829 0.292790 0.539073 \n", - "0 0.244124 0.130232 0.015328 0.494477 0.685741 \n", - "1 0.238400 0.116199 0.010869 0.283251 0.598326 \n", - "2 0.223132 0.195960 0.014601 0.407834 0.581432 \n", - "3 0.238400 0.116199 0.010869 0.283251 0.577011 \n", - "4 0.244124 0.130232 0.015328 0.494477 0.563676 \n", - "5 0.351359 0.333530 0.024940 0.245249 0.475221 \n", - "6 0.000000 0.000000 0.000000 0.000000 0.475221 \n", - "7 0.238400 0.116199 0.010869 0.283251 0.475221 \n", - "0 0.289156 0.079885 0.033319 0.170084 0.881037 \n", - "1 0.021603 0.467839 0.038405 0.115986 0.790468 \n", + " emotion.sadness emotion.joy emotion.fear emotion.disgust \\\n", + "0 0.192383 0.024961 0.313145 0.083320 \n", + "1 0.360925 0.002355 0.266490 0.069938 \n", + "2 0.144346 0.150177 0.246102 0.061760 \n", + "3 0.360925 0.002355 0.266490 0.069938 \n", + "4 0.192383 0.024961 0.313145 0.083320 \n", + "5 0.266573 0.401314 0.089080 0.024027 \n", + "0 0.509224 0.199172 0.038777 0.065161 \n", + "1 0.063022 0.432975 0.107965 0.016918 \n", + "2 NaN NaN NaN NaN \n", + "3 0.509224 0.199172 0.038777 0.065161 \n", + "4 0.172604 0.221188 0.146469 0.022002 \n", + "5 0.286123 0.316074 0.073371 0.041039 \n", + "6 0.645165 0.010028 0.216581 0.022772 \n", + "7 0.180207 0.061332 0.192902 0.008274 \n", + "8 0.180207 0.061332 0.192902 0.008274 \n", + "9 0.063022 0.432975 0.107965 0.016918 \n", + "10 0.180207 0.061332 0.192902 0.008274 \n", + "11 0.165002 0.381043 0.100036 0.035730 \n", + "0 0.032499 0.493942 0.116809 0.009257 \n", + "1 0.499977 0.151388 0.039640 0.036049 \n", "\n", - " sentiment.label sentiment.score text \n", - "0 neutral 0.000000 brick wall \n", - "1 positive 0.863267 true ride \n", - "2 neutral 0.000000 fire \n", - "3 negative -0.738776 toys \n", - "4 negative -0.941767 toy \n", - "5 neutral 0.000000 glass \n", - "6 positive 0.863267 beast \n", - "7 neutral 0.000000 ice \n", - "8 neutral 0.000000 water \n", - "9 negative -0.738776 name \n", - "0 negative -0.875214 waste \n", - "1 negative -0.934512 fire \n", - "2 negative -0.875214 car \n", - "3 neutral 0.000000 hell \n", - "4 negative -0.875214 year \n", - "5 negative -0.665741 reliabile \n", - "6 negative -0.813165 time \n", - "7 neutral 0.000000 wont \n", - "0 negative -0.537564 Top speed \n", - "1 negative -0.857269 HUMMER H1 " + " emotion.anger count 0 \n", + "0 0.277825 1.0 What a waste: I have owned this car for a year... \n", + "1 0.442759 1.0 What a waste: I have owned this car for a year... \n", + "2 0.203999 2.0 What a waste: I have owned this car for a year... \n", + "3 0.442759 1.0 What a waste: I have owned this car for a year... \n", + "4 0.277825 1.0 What a waste: I have owned this car for a year... \n", + "5 0.065767 1.0 What a waste: I have owned this car for a year... \n", + "0 0.044472 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "1 0.090944 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "2 NaN 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "3 0.044472 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "4 0.029588 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "5 0.067708 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "6 0.055427 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "7 0.046232 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "8 0.046232 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "9 0.090944 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "10 0.046232 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "11 0.012944 1.0 HUMMER NOT A bummer : Vehicle is a beast. I do... \n", + "0 0.024046 1.0 AWESOME HUMMER: Hummer is unstoppable. May onl... \n", + "1 0.064654 1.0 AWESOME HUMMER: Hummer is unstoppable. May onl... " ] }, "execution_count": 23, @@ -3002,16 +2694,16 @@ " \n", " \n", " \n", - " count\n", - " emotion.anger\n", - " emotion.disgust\n", - " emotion.fear\n", - " emotion.joy\n", - " emotion.sadness\n", - " relevance\n", + " text\n", " sentiment.label\n", " sentiment.score\n", - " text\n", + " relevance\n", + " emotion.sadness\n", + " emotion.joy\n", + " emotion.fear\n", + " emotion.disgust\n", + " emotion.anger\n", + " count\n", " Review_Date\n", " Author_Name\n", " Vehicle_Title\n", @@ -3024,59 +2716,17 @@ " \n", " \n", " \n", - " 10\n", - " 1.0\n", - " 0.457683\n", - " 0.244124\n", - " 0.130232\n", - " 0.015328\n", - " 0.494477\n", - " 0.685741\n", - " negative\n", - " -0.875214\n", + " 0\n", " waste\n", - " on 06/15/02 00:00 AM (PDT)\n", - " mike6382\n", - " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", - " What a waste\n", - " I have owned this car for a year and a \\rhalf...\n", - " 1.0\n", - " AMGeneral\n", - " What a waste: I have owned this car for a year...\n", - " \n", - " \n", - " 11\n", - " 1.0\n", - " 0.665425\n", - " 0.238400\n", - " 0.116199\n", - " 0.010869\n", - " 0.283251\n", - " 0.598326\n", " negative\n", - " -0.934512\n", - " fire\n", - " on 06/15/02 00:00 AM (PDT)\n", - " mike6382\n", - " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", - " What a waste\n", - " I have owned this car for a year and a \\rhalf...\n", + " -0.875215\n", + " 0.685741\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", " 1.0\n", - " AMGeneral\n", - " What a waste: I have owned this car for a year...\n", - " \n", - " \n", - " 12\n", - " 2.0\n", - " 0.488137\n", - " 0.223132\n", - " 0.195960\n", - " 0.014601\n", - " 0.407834\n", - " 0.581432\n", - " negative\n", - " -0.875214\n", - " car\n", " on 06/15/02 00:00 AM (PDT)\n", " mike6382\n", " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", @@ -3087,17 +2737,17 @@ " What a waste: I have owned this car for a year...\n", " \n", " \n", - " 13\n", + " 1\n", + " fire\n", + " negative\n", + " -0.934513\n", + " 0.598326\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", " 1.0\n", - " 0.665425\n", - " 0.238400\n", - " 0.116199\n", - " 0.010869\n", - " 0.283251\n", - " 0.577011\n", - " neutral\n", - " 0.000000\n", - " hell\n", " on 06/15/02 00:00 AM (PDT)\n", " mike6382\n", " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", @@ -3108,17 +2758,17 @@ " What a waste: I have owned this car for a year...\n", " \n", " \n", - " 14\n", - " 1.0\n", - " 0.457683\n", - " 0.244124\n", - " 0.130232\n", - " 0.015328\n", - " 0.494477\n", - " 0.563676\n", + " 2\n", + " car\n", " negative\n", - " -0.875214\n", - " year\n", + " -0.844774\n", + " 0.581432\n", + " 0.144346\n", + " 0.150177\n", + " 0.246102\n", + " 0.061760\n", + " 0.203999\n", + " 2.0\n", " on 06/15/02 00:00 AM (PDT)\n", " mike6382\n", " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", @@ -3129,17 +2779,17 @@ " What a waste: I have owned this car for a year...\n", " \n", " \n", - " 15\n", - " 1.0\n", - " 0.345149\n", - " 0.351359\n", - " 0.333530\n", - " 0.024940\n", - " 0.245249\n", - " 0.475221\n", + " 3\n", + " hell\n", " negative\n", - " -0.665741\n", - " reliabile\n", + " -0.934513\n", + " 0.577011\n", + " 0.360925\n", + " 0.002355\n", + " 0.266490\n", + " 0.069938\n", + " 0.442759\n", + " 1.0\n", " on 06/15/02 00:00 AM (PDT)\n", " mike6382\n", " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", @@ -3150,17 +2800,17 @@ " What a waste: I have owned this car for a year...\n", " \n", " \n", - " 16\n", - " 1.0\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 0.475221\n", + " 4\n", + " year\n", " negative\n", - " -0.813165\n", - " time\n", + " -0.875215\n", + " 0.563676\n", + " 0.192383\n", + " 0.024961\n", + " 0.313145\n", + " 0.083320\n", + " 0.277825\n", + " 1.0\n", " on 06/15/02 00:00 AM (PDT)\n", " mike6382\n", " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", @@ -3171,17 +2821,17 @@ " What a waste: I have owned this car for a year...\n", " \n", " \n", - " 17\n", - " 1.0\n", - " 0.665425\n", - " 0.238400\n", - " 0.116199\n", - " 0.010869\n", - " 0.283251\n", - " 0.475221\n", + " 5\n", + " time\n", " neutral\n", " 0.000000\n", - " wont\n", + " 0.466983\n", + " 0.266573\n", + " 0.401314\n", + " 0.089080\n", + " 0.024027\n", + " 0.065767\n", + " 1.0\n", " on 06/15/02 00:00 AM (PDT)\n", " mike6382\n", " 2000 AM General Hummer SUV Hard Top 4dr SUV AWD\n", @@ -3196,65 +2846,53 @@ "" ], "text/plain": [ - " count emotion.anger emotion.disgust emotion.fear emotion.joy \\\n", - "10 1.0 0.457683 0.244124 0.130232 0.015328 \n", - "11 1.0 0.665425 0.238400 0.116199 0.010869 \n", - "12 2.0 0.488137 0.223132 0.195960 0.014601 \n", - "13 1.0 0.665425 0.238400 0.116199 0.010869 \n", - "14 1.0 0.457683 0.244124 0.130232 0.015328 \n", - "15 1.0 0.345149 0.351359 0.333530 0.024940 \n", - "16 1.0 0.000000 0.000000 0.000000 0.000000 \n", - "17 1.0 0.665425 0.238400 0.116199 0.010869 \n", + " text sentiment.label sentiment.score relevance emotion.sadness \\\n", + "0 waste negative -0.875215 0.685741 0.192383 \n", + "1 fire negative -0.934513 0.598326 0.360925 \n", + "2 car negative -0.844774 0.581432 0.144346 \n", + "3 hell negative -0.934513 0.577011 0.360925 \n", + "4 year negative -0.875215 0.563676 0.192383 \n", + "5 time neutral 0.000000 0.466983 0.266573 \n", "\n", - " emotion.sadness relevance sentiment.label sentiment.score text \\\n", - "10 0.494477 0.685741 negative -0.875214 waste \n", - "11 0.283251 0.598326 negative -0.934512 fire \n", - "12 0.407834 0.581432 negative -0.875214 car \n", - "13 0.283251 0.577011 neutral 0.000000 hell \n", - "14 0.494477 0.563676 negative -0.875214 year \n", - "15 0.245249 0.475221 negative -0.665741 reliabile \n", - "16 0.000000 0.475221 negative -0.813165 time \n", - "17 0.283251 0.475221 neutral 0.000000 wont \n", + " emotion.joy emotion.fear emotion.disgust emotion.anger count \\\n", + "0 0.024961 0.313145 0.083320 0.277825 1.0 \n", + "1 0.002355 0.266490 0.069938 0.442759 1.0 \n", + "2 0.150177 0.246102 0.061760 0.203999 2.0 \n", + "3 0.002355 0.266490 0.069938 0.442759 1.0 \n", + "4 0.024961 0.313145 0.083320 0.277825 1.0 \n", + "5 0.401314 0.089080 0.024027 0.065767 1.0 \n", "\n", - " Review_Date Author_Name \\\n", - "10 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "11 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "12 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "13 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "14 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "15 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "16 on 06/15/02 00:00 AM (PDT) mike6382 \n", - "17 on 06/15/02 00:00 AM (PDT) mike6382 \n", + " Review_Date Author_Name \\\n", + "0 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "1 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "2 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "3 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "4 on 06/15/02 00:00 AM (PDT) mike6382 \n", + "5 on 06/15/02 00:00 AM (PDT) mike6382 \n", "\n", - " Vehicle_Title Review_Title \\\n", - "10 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "11 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "12 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "13 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "14 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "15 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "16 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", - "17 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + " Vehicle_Title Review_Title \\\n", + "0 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "1 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "2 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "3 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "4 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", + "5 2000 AM General Hummer SUV Hard Top 4dr SUV AWD What a waste \n", "\n", - " Review Rating\\r Car_Make \\\n", - "10 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", - "11 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", - "12 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", - "13 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", - "14 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", - "15 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", - "16 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", - "17 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + " Review Rating\\r Car_Make \\\n", + "0 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "1 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "2 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "3 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "4 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", + "5 I have owned this car for a year and a \\rhalf... 1.0 AMGeneral \n", "\n", - " Review_Content \n", - "10 What a waste: I have owned this car for a year... \n", - "11 What a waste: I have owned this car for a year... \n", - "12 What a waste: I have owned this car for a year... \n", - "13 What a waste: I have owned this car for a year... \n", - "14 What a waste: I have owned this car for a year... \n", - "15 What a waste: I have owned this car for a year... \n", - "16 What a waste: I have owned this car for a year... \n", - "17 What a waste: I have owned this car for a year... " + " Review_Content \n", + "0 What a waste: I have owned this car for a year... \n", + "1 What a waste: I have owned this car for a year... \n", + "2 What a waste: I have owned this car for a year... \n", + "3 What a waste: I have owned this car for a year... \n", + "4 What a waste: I have owned this car for a year... \n", + "5 What a waste: I have owned this car for a year... " ] }, "execution_count": 24, @@ -3274,14 +2912,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As we mentioned above, Watson NLU assigns the sentiment to the keywords based on their context within the sentence. Hence, all keywords within one sentence get the same sentiment score. Thus, to get the aggregated sentiment of each review we calulate the mean sentiment score of its sentences by considering the sentiment assigned to one keyword in each sentence. More specifically, we first drop duplicate sentiment scores for each review and then we calculate the average sentiment and emotion score for each review:" + "merged_keywords_review_dfAs we mentioned above, Watson NLU assigns the sentiment to the keywords based on their context within the sentence. Hence, all keywords within one sentence get the same sentiment score. Thus, to get the aggregated sentiment of each review we calulate the mean sentiment score of its sentences by considering the sentiment assigned to one keyword in each sentence. More specifically, we first drop duplicate sentiment scores for each review and then we calculate the average sentiment and emotion score for each review:" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { - "scrolled": true + "tags": [] }, "outputs": [ { @@ -3305,12 +2943,11 @@ " \n", " \n", " \n", - " emotion.anger\n", - " emotion.disgust\n", - " emotion.fear\n", - " emotion.joy\n", " emotion.sadness\n", - " relevance\n", + " emotion.joy\n", + " emotion.fear\n", + " emotion.disgust\n", + " emotion.anger\n", " sentiment.score\n", " Rating\\r\n", " \n", @@ -3323,372 +2960,328 @@ " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", - " 2010 M 35 Acceleration/Braking Defects\n", - " 0.114714\n", - " 0.072652\n", - " 0.143729\n", - " 0.053823\n", - " 0.339398\n", - " 0.647531\n", - " -0.496327\n", - " 2.375\n", - " \n", - " \n", - " A Dream\n", - " 0.172335\n", - " 0.059397\n", - " 0.092631\n", - " 0.076986\n", - " 0.145674\n", - " 0.732942\n", - " -0.440443\n", + " 1 sweet R32\n", + " 0.151543\n", + " 0.532162\n", + " 0.067859\n", + " 0.018501\n", + " 0.112994\n", + " 0.649825\n", " 4.875\n", " \n", " \n", - " A Wonderful Ownership Experiance\n", - " 0.117943\n", - " 0.016000\n", - " 0.101099\n", - " 0.243949\n", - " 0.113873\n", - " 0.646053\n", - " 0.245085\n", - " 4.750\n", + " 2002 Trans Am/Sunset Orange Metallic\n", + " 0.176322\n", + " 0.465210\n", + " 0.257064\n", + " 0.032842\n", + " 0.038908\n", + " 0.148035\n", + " 4.625\n", " \n", " \n", - " Best truck ever\n", - " 0.042779\n", - " 0.014326\n", - " 0.029521\n", - " 0.277188\n", - " 0.179243\n", - " 0.998194\n", - " 0.969981\n", - " 5.000\n", + " 42 days of driving 8 days in the shop\n", + " 0.206478\n", + " 0.563466\n", + " 0.114506\n", + " 0.010082\n", + " 0.082325\n", + " -0.054126\n", + " 3.375\n", " \n", " \n", - " Even better than the Chevy\n", - " 0.044507\n", - " 0.045185\n", - " 0.054851\n", - " 0.284697\n", - " 0.098830\n", - " 0.664315\n", - " -0.075949\n", - " 5.000\n", + " A great little car\n", + " 0.278575\n", + " 0.470586\n", + " 0.063823\n", + " 0.015218\n", + " 0.039688\n", + " 0.503785\n", + " 4.875\n", " \n", " \n", - " I LOVE MY SLK\n", - " 0.070950\n", - " 0.036633\n", - " 0.064407\n", - " 0.694037\n", - " 0.104404\n", - " 0.645773\n", - " 0.926009\n", + " AWESOME FUN MY LITTLE TIGER\n", + " 0.007629\n", + " 0.628312\n", + " 0.013015\n", + " 0.001452\n", + " 0.024782\n", + " 0.986029\n", " 5.000\n", " \n", " \n", - " I love my Caliber\n", - " 0.057248\n", - " 0.040379\n", - " 0.049315\n", - " 0.604041\n", - " 0.187137\n", - " 0.639690\n", - " 0.919963\n", + " I LOVE my Focus\n", + " 0.074019\n", + " 0.589196\n", + " 0.111722\n", + " 0.008124\n", + " 0.066092\n", + " 0.621983\n", " 4.750\n", " \n", " \n", " Looks Good But Hunk Of Junk\n", - " 0.158059\n", - " 0.097350\n", - " 0.173938\n", - " 0.283959\n", - " 0.441097\n", - " 0.819306\n", - " -0.999568\n", + " 0.144671\n", + " 0.061358\n", + " 0.060613\n", + " 0.050494\n", + " 0.116835\n", + " -0.984622\n", " 2.875\n", " \n", " \n", - " Small Reliable Gas Saver!\n", - " 0.106193\n", - " 0.034896\n", - " 0.076201\n", - " 0.215213\n", - " 0.193688\n", - " 0.620210\n", - " 0.069819\n", - " 4.500\n", + " Mr TACOMA\n", + " 0.122766\n", + " 0.825653\n", + " 0.034777\n", + " 0.023124\n", + " 0.030344\n", + " 0.633803\n", + " 5.000\n", " \n", " \n", " Veracruz\n", - " 0.077888\n", - " 0.017731\n", - " 0.054831\n", - " 0.660735\n", - " 0.153267\n", - " 0.797108\n", - " 0.591815\n", + " 0.106981\n", + " 0.524371\n", + " 0.091482\n", + " 0.012344\n", + " 0.054493\n", + " 0.591816\n", " 4.750\n", " \n", " \n", - " hfgh\n", - " 0.018069\n", - " 0.007672\n", - " 0.040851\n", - " 0.866982\n", - " 0.023953\n", - " 0.860490\n", - " 0.971604\n", - " 5.000\n", + " You will pay for that warranty\n", + " 0.396306\n", + " 0.110458\n", + " 0.056980\n", + " 0.021192\n", + " 0.119030\n", + " -0.373583\n", + " 2.750\n", " \n", " \n", - " i'm on my second one\n", - " 0.367577\n", - " 0.156578\n", - " 0.418364\n", - " 0.043710\n", - " 0.366614\n", - " 0.790430\n", - " -0.825715\n", - " 5.000\n", + " everyday rSx\n", + " 0.038486\n", + " 0.515852\n", + " 0.133419\n", + " 0.008035\n", + " 0.033998\n", + " 0.677286\n", + " 4.000\n", " \n", " \n", - " \"Acceleration failure\" - Genesis phraseology\n", - " 0.141623\n", - " 0.103167\n", - " 0.200245\n", - " 0.063420\n", - " 0.281382\n", - " 0.626660\n", - " -0.643850\n", - " 3.000\n", + " got new weel\n", + " 0.108507\n", + " 0.348390\n", + " 0.079194\n", + " 0.034643\n", + " 0.239177\n", + " 0.654034\n", + " 4.625\n", " \n", " \n", - " \"FUN\"\n", - " 0.079389\n", - " 0.033952\n", - " 0.083648\n", - " 0.535024\n", - " 0.236222\n", - " 0.629725\n", - " 0.086591\n", + " i'm on my second one\n", + " 0.063124\n", + " 0.024840\n", + " 0.053951\n", + " 0.026402\n", + " 0.165089\n", + " -0.973446\n", " 5.000\n", " \n", " \n", - " \"First Ride\" Impressions when I visited Tesla's Factory\n", - " 0.089593\n", - " 0.035031\n", - " 0.061062\n", - " 0.353900\n", - " 0.148852\n", - " 0.623073\n", - " 0.664976\n", - " 4.875\n", + " ! un happy Camper\n", + " 0.424926\n", + " 0.219506\n", + " 0.066627\n", + " 0.036578\n", + " 0.084982\n", + " -0.388182\n", + " 2.625\n", " \n", " \n", - " \"Free\" Green on green lightning\n", - " 0.099639\n", - " 0.032288\n", - " 0.087812\n", - " 0.351336\n", - " 0.215824\n", - " 0.582427\n", - " 0.236840\n", - " 5.000\n", + " \"\"\"I can't believe it \"\"\n", + " 0.244671\n", + " 0.025868\n", + " 0.053133\n", + " 0.067597\n", + " 0.165597\n", + " -0.904022\n", + " 1.000\n", " \n", " \n", - " \"Grin\"\n", - " 0.147565\n", - " 0.086040\n", - " 0.092980\n", - " 0.440214\n", - " 0.043312\n", - " 0.669190\n", - " 0.665514\n", + " \"06\" GTO\n", + " 0.102005\n", + " 0.632400\n", + " 0.113796\n", + " 0.055768\n", + " 0.067348\n", + " 0.759998\n", " 5.000\n", " \n", " \n", - " \"Hemi-ness Is A Warm Run\"\n", - " 0.055350\n", - " 0.025375\n", - " 0.089810\n", - " 0.528417\n", - " 0.072373\n", - " 0.617704\n", - " 0.727694\n", - " 4.750\n", + " \"Acceleration failure\" - Genesis phraseology\n", + " 0.139895\n", + " 0.143780\n", + " 0.259497\n", + " 0.049344\n", + " 0.086211\n", + " -0.632639\n", + " 3.000\n", " \n", " \n", - " \"It's Still in the Shop\"\n", - " 0.213748\n", - " 0.064518\n", - " 0.144874\n", - " 0.147013\n", - " 0.540048\n", - " 0.604038\n", - " -0.716944\n", - " 1.500\n", - " \n", - " \n", - " \"Jack-of-all-trades\" yet master of many\n", - " 0.068786\n", - " 0.030514\n", - " 0.149626\n", - " 0.283489\n", - " 0.241629\n", - " 0.661193\n", - " 0.077671\n", - " 4.375\n", + " \"Cry wolf\" tire light and redundant warning screen\n", + " 0.304694\n", + " 0.165782\n", + " 0.128762\n", + " 0.043923\n", + " 0.172683\n", + " -0.744237\n", + " 3.000\n", + " \n", + " \n", + " \"Downgraded\" to an LS 430 but best upgrade ever!\n", + " 0.381772\n", + " 0.401842\n", + " 0.028101\n", + " 0.018007\n", + " 0.026035\n", + " 0.480924\n", + " 5.000\n", + " \n", + " \n", + " \"First Ride\" Impressions when I visited Tesla's Factory\n", + " 0.231870\n", + " 0.443138\n", + " 0.036262\n", + " 0.023640\n", + " 0.051412\n", + " 0.610619\n", + " 4.875\n", " \n", " \n", "\n", "" ], "text/plain": [ - " emotion.anger \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.114714 \n", - " A Dream 0.172335 \n", - " A Wonderful Ownership Experiance 0.117943 \n", - " Best truck ever 0.042779 \n", - " Even better than the Chevy 0.044507 \n", - " I LOVE MY SLK 0.070950 \n", - " I love my Caliber 0.057248 \n", - " Looks Good But Hunk Of Junk 0.158059 \n", - " Small Reliable Gas Saver! 0.106193 \n", - " Veracruz 0.077888 \n", - " hfgh 0.018069 \n", - " i'm on my second one 0.367577 \n", - "\"Acceleration failure\" - Genesis phraseology 0.141623 \n", - "\"FUN\" 0.079389 \n", - "\"First Ride\" Impressions when I visited Tesla's... 0.089593 \n", - "\"Free\" Green on green lightning 0.099639 \n", - "\"Grin\" 0.147565 \n", - "\"Hemi-ness Is A Warm Run\" 0.055350 \n", - "\"It's Still in the Shop\" 0.213748 \n", - "\"Jack-of-all-trades\" yet master of many 0.068786 \n", - "\n", - " emotion.disgust \\\n", + " emotion.sadness \\\n", "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.072652 \n", - " A Dream 0.059397 \n", - " A Wonderful Ownership Experiance 0.016000 \n", - " Best truck ever 0.014326 \n", - " Even better than the Chevy 0.045185 \n", - " I LOVE MY SLK 0.036633 \n", - " I love my Caliber 0.040379 \n", - " Looks Good But Hunk Of Junk 0.097350 \n", - " Small Reliable Gas Saver! 0.034896 \n", - " Veracruz 0.017731 \n", - " hfgh 0.007672 \n", - " i'm on my second one 0.156578 \n", - "\"Acceleration failure\" - Genesis phraseology 0.103167 \n", - "\"FUN\" 0.033952 \n", - "\"First Ride\" Impressions when I visited Tesla's... 0.035031 \n", - "\"Free\" Green on green lightning 0.032288 \n", - "\"Grin\" 0.086040 \n", - "\"Hemi-ness Is A Warm Run\" 0.025375 \n", - "\"It's Still in the Shop\" 0.064518 \n", - "\"Jack-of-all-trades\" yet master of many 0.030514 \n", + " 1 sweet R32 0.151543 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.176322 \n", + " 42 days of driving 8 days in the shop 0.206478 \n", + " A great little car 0.278575 \n", + " AWESOME FUN MY LITTLE TIGER 0.007629 \n", + " I LOVE my Focus 0.074019 \n", + " Looks Good But Hunk Of Junk 0.144671 \n", + " Mr TACOMA 0.122766 \n", + " Veracruz 0.106981 \n", + " You will pay for that warranty 0.396306 \n", + " everyday rSx 0.038486 \n", + " got new weel 0.108507 \n", + " i'm on my second one 0.063124 \n", + "! un happy Camper 0.424926 \n", + "\"\"\"I can't believe it \"\" 0.244671 \n", + "\"06\" GTO 0.102005 \n", + "\"Acceleration failure\" - Genesis phraseology 0.139895 \n", + "\"Cry wolf\" tire light and redundant warning screen 0.304694 \n", + "\"Downgraded\" to an LS 430 but best upgrade ever! 0.381772 \n", + "\"First Ride\" Impressions when I visited Tesla's... 0.231870 \n", "\n", - " emotion.fear emotion.joy \\\n", + " emotion.joy emotion.fear \\\n", "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.143729 0.053823 \n", - " A Dream 0.092631 0.076986 \n", - " A Wonderful Ownership Experiance 0.101099 0.243949 \n", - " Best truck ever 0.029521 0.277188 \n", - " Even better than the Chevy 0.054851 0.284697 \n", - " I LOVE MY SLK 0.064407 0.694037 \n", - " I love my Caliber 0.049315 0.604041 \n", - " Looks Good But Hunk Of Junk 0.173938 0.283959 \n", - " Small Reliable Gas Saver! 0.076201 0.215213 \n", - " Veracruz 0.054831 0.660735 \n", - " hfgh 0.040851 0.866982 \n", - " i'm on my second one 0.418364 0.043710 \n", - "\"Acceleration failure\" - Genesis phraseology 0.200245 0.063420 \n", - "\"FUN\" 0.083648 0.535024 \n", - "\"First Ride\" Impressions when I visited Tesla's... 0.061062 0.353900 \n", - "\"Free\" Green on green lightning 0.087812 0.351336 \n", - "\"Grin\" 0.092980 0.440214 \n", - "\"Hemi-ness Is A Warm Run\" 0.089810 0.528417 \n", - "\"It's Still in the Shop\" 0.144874 0.147013 \n", - "\"Jack-of-all-trades\" yet master of many 0.149626 0.283489 \n", + " 1 sweet R32 0.532162 0.067859 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.465210 0.257064 \n", + " 42 days of driving 8 days in the shop 0.563466 0.114506 \n", + " A great little car 0.470586 0.063823 \n", + " AWESOME FUN MY LITTLE TIGER 0.628312 0.013015 \n", + " I LOVE my Focus 0.589196 0.111722 \n", + " Looks Good But Hunk Of Junk 0.061358 0.060613 \n", + " Mr TACOMA 0.825653 0.034777 \n", + " Veracruz 0.524371 0.091482 \n", + " You will pay for that warranty 0.110458 0.056980 \n", + " everyday rSx 0.515852 0.133419 \n", + " got new weel 0.348390 0.079194 \n", + " i'm on my second one 0.024840 0.053951 \n", + "! un happy Camper 0.219506 0.066627 \n", + "\"\"\"I can't believe it \"\" 0.025868 0.053133 \n", + "\"06\" GTO 0.632400 0.113796 \n", + "\"Acceleration failure\" - Genesis phraseology 0.143780 0.259497 \n", + "\"Cry wolf\" tire light and redundant warning screen 0.165782 0.128762 \n", + "\"Downgraded\" to an LS 430 but best upgrade ever! 0.401842 0.028101 \n", + "\"First Ride\" Impressions when I visited Tesla's... 0.443138 0.036262 \n", "\n", - " emotion.sadness \\\n", + " emotion.disgust \\\n", "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.339398 \n", - " A Dream 0.145674 \n", - " A Wonderful Ownership Experiance 0.113873 \n", - " Best truck ever 0.179243 \n", - " Even better than the Chevy 0.098830 \n", - " I LOVE MY SLK 0.104404 \n", - " I love my Caliber 0.187137 \n", - " Looks Good But Hunk Of Junk 0.441097 \n", - " Small Reliable Gas Saver! 0.193688 \n", - " Veracruz 0.153267 \n", - " hfgh 0.023953 \n", - " i'm on my second one 0.366614 \n", - "\"Acceleration failure\" - Genesis phraseology 0.281382 \n", - "\"FUN\" 0.236222 \n", - "\"First Ride\" Impressions when I visited Tesla's... 0.148852 \n", - "\"Free\" Green on green lightning 0.215824 \n", - "\"Grin\" 0.043312 \n", - "\"Hemi-ness Is A Warm Run\" 0.072373 \n", - "\"It's Still in the Shop\" 0.540048 \n", - "\"Jack-of-all-trades\" yet master of many 0.241629 \n", + " 1 sweet R32 0.018501 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.032842 \n", + " 42 days of driving 8 days in the shop 0.010082 \n", + " A great little car 0.015218 \n", + " AWESOME FUN MY LITTLE TIGER 0.001452 \n", + " I LOVE my Focus 0.008124 \n", + " Looks Good But Hunk Of Junk 0.050494 \n", + " Mr TACOMA 0.023124 \n", + " Veracruz 0.012344 \n", + " You will pay for that warranty 0.021192 \n", + " everyday rSx 0.008035 \n", + " got new weel 0.034643 \n", + " i'm on my second one 0.026402 \n", + "! un happy Camper 0.036578 \n", + "\"\"\"I can't believe it \"\" 0.067597 \n", + "\"06\" GTO 0.055768 \n", + "\"Acceleration failure\" - Genesis phraseology 0.049344 \n", + "\"Cry wolf\" tire light and redundant warning screen 0.043923 \n", + "\"Downgraded\" to an LS 430 but best upgrade ever! 0.018007 \n", + "\"First Ride\" Impressions when I visited Tesla's... 0.023640 \n", "\n", - " relevance \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.647531 \n", - " A Dream 0.732942 \n", - " A Wonderful Ownership Experiance 0.646053 \n", - " Best truck ever 0.998194 \n", - " Even better than the Chevy 0.664315 \n", - " I LOVE MY SLK 0.645773 \n", - " I love my Caliber 0.639690 \n", - " Looks Good But Hunk Of Junk 0.819306 \n", - " Small Reliable Gas Saver! 0.620210 \n", - " Veracruz 0.797108 \n", - " hfgh 0.860490 \n", - " i'm on my second one 0.790430 \n", - "\"Acceleration failure\" - Genesis phraseology 0.626660 \n", - "\"FUN\" 0.629725 \n", - "\"First Ride\" Impressions when I visited Tesla's... 0.623073 \n", - "\"Free\" Green on green lightning 0.582427 \n", - "\"Grin\" 0.669190 \n", - "\"Hemi-ness Is A Warm Run\" 0.617704 \n", - "\"It's Still in the Shop\" 0.604038 \n", - "\"Jack-of-all-trades\" yet master of many 0.661193 \n", + " emotion.anger \\\n", + "Review_Title \n", + " 1 sweet R32 0.112994 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.038908 \n", + " 42 days of driving 8 days in the shop 0.082325 \n", + " A great little car 0.039688 \n", + " AWESOME FUN MY LITTLE TIGER 0.024782 \n", + " I LOVE my Focus 0.066092 \n", + " Looks Good But Hunk Of Junk 0.116835 \n", + " Mr TACOMA 0.030344 \n", + " Veracruz 0.054493 \n", + " You will pay for that warranty 0.119030 \n", + " everyday rSx 0.033998 \n", + " got new weel 0.239177 \n", + " i'm on my second one 0.165089 \n", + "! un happy Camper 0.084982 \n", + "\"\"\"I can't believe it \"\" 0.165597 \n", + "\"06\" GTO 0.067348 \n", + "\"Acceleration failure\" - Genesis phraseology 0.086211 \n", + "\"Cry wolf\" tire light and redundant warning screen 0.172683 \n", + "\"Downgraded\" to an LS 430 but best upgrade ever! 0.026035 \n", + "\"First Ride\" Impressions when I visited Tesla's... 0.051412 \n", "\n", " sentiment.score Rating\\r \n", "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects -0.496327 2.375 \n", - " A Dream -0.440443 4.875 \n", - " A Wonderful Ownership Experiance 0.245085 4.750 \n", - " Best truck ever 0.969981 5.000 \n", - " Even better than the Chevy -0.075949 5.000 \n", - " I LOVE MY SLK 0.926009 5.000 \n", - " I love my Caliber 0.919963 4.750 \n", - " Looks Good But Hunk Of Junk -0.999568 2.875 \n", - " Small Reliable Gas Saver! 0.069819 4.500 \n", - " Veracruz 0.591815 4.750 \n", - " hfgh 0.971604 5.000 \n", - " i'm on my second one -0.825715 5.000 \n", - "\"Acceleration failure\" - Genesis phraseology -0.643850 3.000 \n", - "\"FUN\" 0.086591 5.000 \n", - "\"First Ride\" Impressions when I visited Tesla's... 0.664976 4.875 \n", - "\"Free\" Green on green lightning 0.236840 5.000 \n", - "\"Grin\" 0.665514 5.000 \n", - "\"Hemi-ness Is A Warm Run\" 0.727694 4.750 \n", - "\"It's Still in the Shop\" -0.716944 1.500 \n", - "\"Jack-of-all-trades\" yet master of many 0.077671 4.375 " + " 1 sweet R32 0.649825 4.875 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.148035 4.625 \n", + " 42 days of driving 8 days in the shop -0.054126 3.375 \n", + " A great little car 0.503785 4.875 \n", + " AWESOME FUN MY LITTLE TIGER 0.986029 5.000 \n", + " I LOVE my Focus 0.621983 4.750 \n", + " Looks Good But Hunk Of Junk -0.984622 2.875 \n", + " Mr TACOMA 0.633803 5.000 \n", + " Veracruz 0.591816 4.750 \n", + " You will pay for that warranty -0.373583 2.750 \n", + " everyday rSx 0.677286 4.000 \n", + " got new weel 0.654034 4.625 \n", + " i'm on my second one -0.973446 5.000 \n", + "! un happy Camper -0.388182 2.625 \n", + "\"\"\"I can't believe it \"\" -0.904022 1.000 \n", + "\"06\" GTO 0.759998 5.000 \n", + "\"Acceleration failure\" - Genesis phraseology -0.632639 3.000 \n", + "\"Cry wolf\" tire light and redundant warning screen -0.744237 3.000 \n", + "\"Downgraded\" to an LS 430 but best upgrade ever! 0.480924 5.000 \n", + "\"First Ride\" Impressions when I visited Tesla's... 0.610619 4.875 " ] }, "execution_count": 25, @@ -3697,7 +3290,13 @@ } ], "source": [ - "agg_merged_keywords_review_df = merged_keywords_review_df.drop(['count'], axis=1).drop_duplicates(['Review_Title','sentiment.score']).groupby('Review_Title').mean()\n", + "sentiment_cols = [str(c) for c in merged_keywords_review_df.columns\n", + " if c.startswith('emotion.')] + ['sentiment.score']\n", + "agg_merged_keywords_review_df = (\n", + " merged_keywords_review_df[sentiment_cols + ['Review_Title', 'Rating\\r']]\n", + " .drop_duplicates(['Review_Title','sentiment.score'])\n", + " .groupby('Review_Title')\n", + " .mean())\n", "agg_merged_keywords_review_df.head(20)" ] }, @@ -3717,273 +3316,234 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 emotion.angeremotion.disgustemotion.fearemotion.joyemotion.sadnessrelevancesentiment.scoreRating\r", + " emotion.sadnessemotion.joyemotion.fearemotion.disgustemotion.angersentiment.scoreRating\r", "
emotion.anger1.000.460.43-0.520.46-0.13-0.54-0.39
emotion.disgust0.461.000.30-0.450.38-0.16-0.43-0.32
emotion.fear0.430.301.00-0.520.46-0.13-0.48-0.31
emotion.joy-0.52-0.45-0.521.00-0.640.240.710.46
emotion.sadness0.460.380.46-0.641.00-0.16-0.66-0.47
relevance-0.13-0.16-0.130.24-0.161.000.170.10
sentiment.score-0.54-0.43-0.480.71-0.660.171.000.61
Rating\r", + " emotion.sadness1.000000-0.6354960.0990180.0620680.158305-0.519823-0.353612
emotion.joy-0.6354961.000000-0.391679-0.226616-0.4847400.7612110.518204
emotion.fear0.099018-0.3916791.0000000.0778240.149845-0.321152-0.187657
emotion.disgust0.062068-0.2266160.0778241.0000000.136611-0.213541-0.155754
emotion.anger0.158305-0.4847400.1498450.1366111.000000-0.440811-0.352083
sentiment.score-0.5198230.761211-0.321152-0.213541-0.4408111.0000000.620320
Rating\r", "-0.39-0.32-0.310.46-0.470.100.611.00-0.3536120.518204-0.187657-0.155754-0.3520830.6203201.000000
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 26, @@ -3993,7 +3553,7 @@ ], "source": [ "corr = agg_merged_keywords_review_df.corr(method ='pearson')\n", - "corr.style.background_gradient(cmap='coolwarm').set_precision(2)" + "corr.style.background_gradient(cmap='coolwarm')" ] }, { @@ -4059,6 +3619,9 @@ "outputs": [ { "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ "LinearRegression()" ] @@ -4105,8 +3668,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Mean Squared Error = 0.5905008481781794\n", - "R-Squared = 0.3859872132354025\n" + "Mean Squared Error = 0.6260140152618712\n", + "R-Squared = 0.37732446794693686\n" ] } ], @@ -4164,12 +3727,11 @@ " \n", " \n", " \n", - " emotion.anger\n", - " emotion.disgust\n", - " emotion.fear\n", - " emotion.joy\n", " emotion.sadness\n", - " relevance\n", + " emotion.joy\n", + " emotion.fear\n", + " emotion.disgust\n", + " emotion.anger\n", " sentiment.score\n", " \n", " \n", @@ -4180,96 +3742,82 @@ " \n", " \n", " \n", - " \n", " \n", " \n", " \n", " \n", - " 2010 M 35 Acceleration/Braking Defects\n", - " 0.114714\n", - " 0.072652\n", - " 0.143729\n", - " 0.053823\n", - " 0.339398\n", - " 0.647531\n", - " -0.496327\n", - " \n", - " \n", - " A Dream\n", - " 0.172335\n", - " 0.059397\n", - " 0.092631\n", - " 0.076986\n", - " 0.145674\n", - " 0.732942\n", - " -0.440443\n", - " \n", - " \n", - " A Wonderful Ownership Experiance\n", - " 0.117943\n", - " 0.016000\n", - " 0.101099\n", - " 0.243949\n", - " 0.113873\n", - " 0.646053\n", - " 0.245085\n", - " \n", - " \n", - " Best truck ever\n", - " 0.042779\n", - " 0.014326\n", - " 0.029521\n", - " 0.277188\n", - " 0.179243\n", - " 0.998194\n", - " 0.969981\n", - " \n", - " \n", - " Even better than the Chevy\n", - " 0.044507\n", - " 0.045185\n", - " 0.054851\n", - " 0.284697\n", - " 0.098830\n", - " 0.664315\n", - " -0.075949\n", + " 1 sweet R32\n", + " 0.151543\n", + " 0.532162\n", + " 0.067859\n", + " 0.018501\n", + " 0.112994\n", + " 0.649825\n", + " \n", + " \n", + " 2002 Trans Am/Sunset Orange Metallic\n", + " 0.176322\n", + " 0.465210\n", + " 0.257064\n", + " 0.032842\n", + " 0.038908\n", + " 0.148035\n", + " \n", + " \n", + " 42 days of driving 8 days in the shop\n", + " 0.206478\n", + " 0.563466\n", + " 0.114506\n", + " 0.010082\n", + " 0.082325\n", + " -0.054126\n", + " \n", + " \n", + " A great little car\n", + " 0.278575\n", + " 0.470586\n", + " 0.063823\n", + " 0.015218\n", + " 0.039688\n", + " 0.503785\n", + " \n", + " \n", + " AWESOME FUN MY LITTLE TIGER\n", + " 0.007629\n", + " 0.628312\n", + " 0.013015\n", + " 0.001452\n", + " 0.024782\n", + " 0.986029\n", " \n", " \n", "\n", "" ], "text/plain": [ - " emotion.anger emotion.disgust \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.114714 0.072652 \n", - " A Dream 0.172335 0.059397 \n", - " A Wonderful Ownership Experiance 0.117943 0.016000 \n", - " Best truck ever 0.042779 0.014326 \n", - " Even better than the Chevy 0.044507 0.045185 \n", + " emotion.sadness emotion.joy \\\n", + "Review_Title \n", + " 1 sweet R32 0.151543 0.532162 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.176322 0.465210 \n", + " 42 days of driving 8 days in the shop 0.206478 0.563466 \n", + " A great little car 0.278575 0.470586 \n", + " AWESOME FUN MY LITTLE TIGER 0.007629 0.628312 \n", "\n", - " emotion.fear emotion.joy \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.143729 0.053823 \n", - " A Dream 0.092631 0.076986 \n", - " A Wonderful Ownership Experiance 0.101099 0.243949 \n", - " Best truck ever 0.029521 0.277188 \n", - " Even better than the Chevy 0.054851 0.284697 \n", - "\n", - " emotion.sadness relevance \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.339398 0.647531 \n", - " A Dream 0.145674 0.732942 \n", - " A Wonderful Ownership Experiance 0.113873 0.646053 \n", - " Best truck ever 0.179243 0.998194 \n", - " Even better than the Chevy 0.098830 0.664315 \n", + " emotion.fear emotion.disgust \\\n", + "Review_Title \n", + " 1 sweet R32 0.067859 0.018501 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.257064 0.032842 \n", + " 42 days of driving 8 days in the shop 0.114506 0.010082 \n", + " A great little car 0.063823 0.015218 \n", + " AWESOME FUN MY LITTLE TIGER 0.013015 0.001452 \n", "\n", - " sentiment.score \n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects -0.496327 \n", - " A Dream -0.440443 \n", - " A Wonderful Ownership Experiance 0.245085 \n", - " Best truck ever 0.969981 \n", - " Even better than the Chevy -0.075949 " + " emotion.anger sentiment.score \n", + "Review_Title \n", + " 1 sweet R32 0.112994 0.649825 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.038908 0.148035 \n", + " 42 days of driving 8 days in the shop 0.082325 -0.054126 \n", + " A great little car 0.039688 0.503785 \n", + " AWESOME FUN MY LITTLE TIGER 0.024782 0.986029 " ] }, "execution_count": 32, @@ -4278,7 +3826,7 @@ } ], "source": [ - "X_df = agg_merged_keywords_review_df.dropna().iloc[:, :7]\n", + "X_df = agg_merged_keywords_review_df.drop(columns='Rating\\r').dropna().iloc[:, :7]\n", "X_df.head()" ] }, @@ -4290,7 +3838,7 @@ }, "outputs": [], "source": [ - "X = X_df.values.reshape(-1, 7) # values converts it into a numpy array\n", + "X = X_df.values.reshape(-1, 6) # values converts it into a numpy array\n", "Y = agg_merged_keywords_review_df.dropna()['Rating\\r'].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column" ] }, @@ -4310,6 +3858,9 @@ "outputs": [ { "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ "LinearRegression()" ] @@ -4342,8 +3893,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Mean Squared Error = 0.5800027622737277\n", - "R-Squared = 0.39690330082744185\n" + "Mean Squared Error = 0.6149240275777909\n", + "R-Squared = 0.3883553136042148\n" ] } ], @@ -4377,14 +3928,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Feature Coefficients = [[-0.81706333 -0.75366695 0.18783079 0.00182372 -0.8290045 -0.07412579\n", - " 0.95410364]]\n" + "Feature Coefficients = [[-0.22073291 0.32887635 0.37191993 -0.32082964 -1.60607343 1.01721109]]\n" ] }, { "data": { "text/plain": [ - "array([4.44054646])" + "array([4.0674602])" ] }, "execution_count": 38, @@ -4439,28 +3989,28 @@ " \n", " \n", " 0\n", - " 5.235334\n", - " 2.375000\n", + " 4.300783\n", + " 4.875\n", " \n", " \n", " 1\n", - " 4.735796\n", - " 4.875000\n", + " 3.895731\n", + " 4.625\n", " \n", " \n", " 2\n", - " 4.567478\n", - " 4.750000\n", + " 4.342326\n", + " 3.375\n", " \n", " \n", " 3\n", - " 4.404560\n", - " 5.000000\n", + " 4.705346\n", + " 4.875\n", " \n", " \n", " 4\n", - " 5.188259\n", - " 5.000000\n", + " 3.256459\n", + " 5.000\n", " \n", " \n", " ...\n", @@ -4468,50 +4018,50 @@ " ...\n", " \n", " \n", - " 1524\n", - " 4.547005\n", - " 5.000000\n", + " 1527\n", + " 4.623339\n", + " 3.125\n", " \n", " \n", - " 1525\n", - " 4.129847\n", - " 5.000000\n", + " 1528\n", + " 3.747142\n", + " 5.000\n", " \n", " \n", - " 1526\n", - " 4.019317\n", - " 1.886364\n", + " 1529\n", + " 3.874102\n", + " 4.500\n", " \n", " \n", - " 1527\n", - " 4.591441\n", - " 3.500000\n", + " 1530\n", + " 3.468226\n", + " 3.875\n", " \n", " \n", - " 1528\n", - " 4.195468\n", - " 4.125000\n", + " 1531\n", + " 4.406095\n", + " 5.000\n", " \n", " \n", "\n", - "

1529 rows × 2 columns

\n", + "

1532 rows × 2 columns

\n", "" ], "text/plain": [ " Predicted Rating Actual Rating\n", - "0 5.235334 2.375000\n", - "1 4.735796 4.875000\n", - "2 4.567478 4.750000\n", - "3 4.404560 5.000000\n", - "4 5.188259 5.000000\n", + "0 4.300783 4.875\n", + "1 3.895731 4.625\n", + "2 4.342326 3.375\n", + "3 4.705346 4.875\n", + "4 3.256459 5.000\n", "... ... ...\n", - "1524 4.547005 5.000000\n", - "1525 4.129847 5.000000\n", - "1526 4.019317 1.886364\n", - "1527 4.591441 3.500000\n", - "1528 4.195468 4.125000\n", + "1527 4.623339 3.125\n", + "1528 3.747142 5.000\n", + "1529 3.874102 4.500\n", + "1530 3.468226 3.875\n", + "1531 4.406095 5.000\n", "\n", - "[1529 rows x 2 columns]" + "[1532 rows x 2 columns]" ] }, "execution_count": 39, @@ -4541,19 +4091,17 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], "source": [ - "plt.scatter(Y_test, Y_pred)\n", + "plt.scatter(Y_test, Y_pred, alpha=0.2)\n", "plt.xlabel('Rating From Dataset')\n", "plt.ylabel('Rating Predicted By Model')\n", "plt.rcParams[\"figure.figsize\"] = (10,6) # Custom figure size in inches\n", @@ -4581,6 +4129,9 @@ "outputs": [ { "data": { + "text/html": [ + "
RandomForestRegressor(random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ "RandomForestRegressor(random_state=0)" ] @@ -4634,8 +4185,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Mean Squared Error = 0.5311930091091184\n", - "R-Squared = 0.44765650914942323\n" + "Mean Squared Error = 0.572035684052662\n", + "R-Squared = 0.43101493698694837\n" ] } ], @@ -4671,19 +4222,17 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], "source": [ - "plt.scatter(Y_test, Y_pred)\n", + "plt.scatter(Y_test, Y_pred, alpha=0.2)\n", "plt.xlabel('Rating From Dataset')\n", "plt.ylabel('Rating Predicted By Model')\n", "plt.rcParams[\"figure.figsize\"] = (10,6) # Custom figure size in inches\n", @@ -4713,8 +4262,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Mean Squared Error = 0.5024629864248751\n", - "R-Squared = 0.47753047350796296\n" + "Mean Squared Error = 0.5519593529266892\n", + "R-Squared = 0.4509842026276053\n" ] } ], @@ -4751,19 +4300,17 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], "source": [ - "plt.scatter(Y_test, Y_pred)\n", + "plt.scatter(Y_test, Y_pred, alpha=0.2)\n", "plt.xlabel('Rating From Dataset')\n", "plt.ylabel('Rating Predicted By Model')\n", "plt.rcParams[\"figure.figsize\"] = (10,6) # Custom figure size in inches\n", @@ -4835,194 +4382,194 @@ " \n", " \n", " \n", - " 2010 M 35 Acceleration/Braking Defects\n", - " -0.496327\n", - " 0.339398\n", - " 0.053823\n", - " 0.143729\n", - " 0.072652\n", - " 0.114714\n", - " 2.375\n", - " infiniti\n", - " 2010 M 35 Acceleration/Braking Defects: I leased a new 2010 M 35. This was my 9th Nissan/Infinity vehicle since 1996. This vehicle has been a huge disappointment. The vehicle has demonstrated low speed and high speed rpm increases while downs shifting. At high speeds, this is causing additional wear and tear on the drive train as evidenced by a large clanking noise. At low speeds, this is causing excess wear and tear on brakes. In both situations, it poses a safety hazard. Nissan/Infinity claims this is within the normal operating range, but the problem is significant and widespread as evidenced by Nissan/Infinity's own service people and engineers. Infinity will be forced to address this very soon or face a Toyota situation.\n", - " \n", - " \n", - " A Dream\n", - " -0.440443\n", - " 0.145674\n", - " 0.076986\n", - " 0.092631\n", - " 0.059397\n", - " 0.172335\n", + " 1 sweet R32\n", + " 0.649825\n", + " 0.151543\n", + " 0.532162\n", + " 0.067859\n", + " 0.018501\n", + " 0.112994\n", " 4.875\n", - " BMW\n", - " A Dream: Bought this vehicle for myself with 97000 miles and fully loaded with no apparent deficits (one owner vehicle). Body, engine, paint, interior, exterior in immaculate condition. Have driven it on long and short trips and it continues to elicit stares from lexus and benz owners. This vehicle is made for driving!!\n", - " \n", - " \n", - " A Wonderful Ownership Experiance\n", - " 0.245085\n", - " 0.113873\n", - " 0.243949\n", - " 0.101099\n", - " 0.016000\n", - " 0.117943\n", - " 4.750\n", - " lincoln\n", - " A Wonderful Ownership Experiance: Purchased the car with 20,000 miles on \\rit. I drive every day and I am not \\reasy on a car. My 98 Town Car \\rSignature has been truly a wonderful \\rcar and most of all I have had not one \\rmajor problem with the car and any \\rvisit to the Lincoln service \\rdepartment has only given me great \\rtreatment.The car is always reliable \\rand built tough, I am approaching \\r110,000 miles on it and I have got the \\rurge to get something new, however \\rthis car has been so good to me I \\rdon't want to trade it for something \\rwith problems. I would recomend this \\rcar for anyone who desires a luxury \\rride and reliability.\n", - " \n", - " \n", - " Best truck ever\n", - " 0.969981\n", - " 0.179243\n", - " 0.277188\n", - " 0.029521\n", - " 0.014326\n", - " 0.042779\n", - " 5.000\n", - " Chevrolet\n", - " Best truck ever: very reliable well rounded truck.\n", + " Volkswagen\n", + " 1 sweet R32: I was looking into buying a Subaru WRX \\rSTI, but after two test drives in each \\rand reading as many \\rRoad&Track,Car&Driver,and any other \\rinfo I could find I desided to go with \\rthe R32. I traded in my 2003 GTI VR6 \\rthat had 29,000 miles on it. That was a \\rgreat car but this is a whole new \\rbeast. Once you own an all wheel drive \\rthere is just no going back. This car \\rhandles like a dream, the seats are the \\rbest I've ever been in. Cabin is put \\rtogether very well and the pipes are \\rcrazy. The climit control is awsome, \\rheated seats are so sweet on those cold \\rwinter days. I live in the central \\rvalley of California so these tire are \\rthe best. If there was one thing I \\rwould change(give me a spare tire)!!!!!\n", " \n", " \n", - " Even better than the Chevy\n", - " -0.075949\n", - " 0.098830\n", - " 0.284697\n", - " 0.054851\n", - " 0.045185\n", - " 0.044507\n", - " 5.000\n", - " GMC\n", - " Even better than the Chevy: This is the best truck I have ever \\rowned. This is my first Full Size, but \\rmy dad has a silverado. My GMC drives \\rjust a little bit better,is a little \\rbit quiter, and the exterior styling \\ris the classiest out of all the trucks \\rin the market. Nobody could talk me \\rout of this truck and into anything \\relse. Eventhough the 2004 Ford \\rinterior has a good design, you still \\rcouldn't pay me enough to own a FORD. \\rThe editor that rated the Sierra lost \\rthere mind, it should be alot higher \\rthan the low 8's. The only thing I \\rregret is not getting the Z71 pkg.\n", + " 2002 Trans Am/Sunset Orange Metallic\n", + " 0.148035\n", + " 0.176322\n", + " 0.465210\n", + " 0.257064\n", + " 0.032842\n", + " 0.038908\n", + " 4.625\n", + " pontiac\n", + " 2002 Trans Am/Sunset Orange Metallic: This Is Pontiac's most exciting vehicle \\rof all time.It has so much performance \\rthat it is a big disapointment that it \\rwill be discontinued this year.The only \\rarea that this vehicle does not excell \\rin would be the fuel economy \\rdepartment.I guess that if you can \\rafford one of these dream cars, you \\rreally dony worry about how far it will \\rtravel on a tankfull of gas.\n", + " \n", + " \n", + " 42 days of driving 8 days in the shop\n", + " -0.054126\n", + " 0.206478\n", + " 0.563466\n", + " 0.114506\n", + " 0.010082\n", + " 0.082325\n", + " 3.375\n", + " chrysler\n", + " 42 days of driving 8 days in the shop : I was given the sebring for my 20th wedding anniversary. I have been in love with it for years and finally got it. After 42 days I blew most of the electrical system. It has been at the dealer for 8 days and they can not find the problem. Right now I am not very happy.\n", + " \n", + " \n", + " A great little car\n", + " 0.503785\n", + " 0.278575\n", + " 0.470586\n", + " 0.063823\n", + " 0.015218\n", + " 0.039688\n", + " 4.875\n", + " kia\n", + " A great little car: Bought my Spectra about one year ago, currently has about 18,000 miles on it. I have had absolutely no problems with it. I had cruise control added at the time of purchase, other than that it's stock. This is my daily driver, it's comfortable, reliable and gets decent mileage. The Spectra happens to be my second Kia, I have a Sedona van that has been to the dealer several times (however everything was covered by the warranty) it currently has 58,000 miles on it. The Spectra's a great handling car.\n", " \n", " \n", - " I LOVE MY SLK\n", - " 0.926009\n", - " 0.104404\n", - " 0.694037\n", - " 0.064407\n", - " 0.036633\n", - " 0.070950\n", + " AWESOME FUN MY LITTLE TIGER\n", + " 0.986029\n", + " 0.007629\n", + " 0.628312\n", + " 0.013015\n", + " 0.001452\n", + " 0.024782\n", " 5.000\n", - " mercedes-benz\n", - " I LOVE MY SLK: It is fun driving top down or up but I will srive it top down more weather permitting. It get's pretty darned hot here in Phoenix AZ in July.\n", + " fiat\n", + " AWESOME FUN MY LITTLE TIGER: Abarth is ultimately more fun than my old mustang or Z a little power house that doesn't shy away from a fight love the engine growl and the kick more room than you think awesome bang for the buck .Fun the most fun than any car I have ever own worth every penny a pleasure to drive.\n", " \n", " \n", - " I love my Caliber\n", - " 0.919963\n", - " 0.187137\n", - " 0.604041\n", - " 0.049315\n", - " 0.040379\n", - " 0.057248\n", + " I LOVE my Focus\n", + " 0.621983\n", + " 0.074019\n", + " 0.589196\n", + " 0.111722\n", + " 0.008124\n", + " 0.066092\n", " 4.750\n", - " dodge\n", - " I love my Caliber: I just bought my Caliber this week So far it has been a blast to drive. I was so surprised how much pep it's got for a small engine. It's also very smooth, you don't even feel the gear shift, I love that. I also love the way it feels like an SUV but looks like a sporty car. So far I love everything about it.\n", + " ford\n", + " I LOVE my Focus: I LOVE my Focus. I've had it about 2 \\ryears. It drives great, looks good, \\rgets great gas milage and never slows \\rdown. I'm even thinking of getting \\ranother one on my next car purchase!\n", " \n", " \n", " Looks Good But Hunk Of Junk\n", - " -0.999568\n", - " 0.441097\n", - " 0.283959\n", - " 0.173938\n", - " 0.097350\n", - " 0.158059\n", + " -0.984622\n", + " 0.144671\n", + " 0.061358\n", + " 0.060613\n", + " 0.050494\n", + " 0.116835\n", " 2.875\n", " maserati\n", " Looks Good But Hunk Of Junk: This car is strictly \"looks only\", it is not reliable or even close to it.I have already sank $13,760 in repairs at only 23K miles.This is totally unacceptable for a $140K car when new.I am taking it to the auction next week to \"unload\" before it can empty my wallet again.But if you want a sharp car that sits good in the driveway - this is it!Just don't drive it anywhere!!\n", " \n", " \n", - " Small Reliable Gas Saver!\n", - " 0.069819\n", - " 0.193688\n", - " 0.215213\n", - " 0.076201\n", - " 0.034896\n", - " 0.106193\n", - " 4.500\n", - " mitsubishi\n", - " Small Reliable Gas Saver!: Purchase car some 166,000 miles ago and have enjoyed its reliability, gas savings, and small car comfort. Yes, it does hesitate on steep hills (only has a 1.5 engine), but it makes the hills. I have traveled from Florida to Nevada, Texas, Oklahoma, Missouri, and now live in Pennsylvania. I make trips to New York and Massachusetts. I do not tire in driving this car. Maintenance has not been a major issue. Yes, I have changed the struts, brakes - but that is to be expected with its mileage. I do recommend consideration of this vehicle for college or young family starting out in life. It has more than paid for its purchase price many, many times. Enjoy.\n", + " Mr TACOMA\n", + " 0.633803\n", + " 0.122766\n", + " 0.825653\n", + " 0.034777\n", + " 0.023124\n", + " 0.030344\n", + " 5.000\n", + " Toyota\n", + " Mr TACOMA: Great truck. The Handling is pretty \\rnice and the engine is stronger. The V6 \\rwith 3100 pounds can really make this \\rtruck move.\n", " \n", " \n", " Veracruz\n", - " 0.591815\n", - " 0.153267\n", - " 0.660735\n", - " 0.054831\n", - " 0.017731\n", - " 0.077888\n", + " 0.591816\n", + " 0.106981\n", + " 0.524371\n", + " 0.091482\n", + " 0.012344\n", + " 0.054493\n", " 4.750\n", " hyundai\n", " Veracruz: This is a crossover with the ride of a cruse ship. The car has so many bells and whistles. Have it one week and already over 1100 miles. Finding wonderful things about it every day. Could be the best car ever.\n", " \n", + " \n", + " You will pay for that warranty\n", + " -0.373583\n", + " 0.396306\n", + " 0.110458\n", + " 0.056980\n", + " 0.021192\n", + " 0.119030\n", + " 2.750\n", + " kia\n", + " You will pay for that warranty: Own a 2002 KIA Sedona EX. I complained about lights going dim while under warranty. Kia checked, said everything within parameters. Guess what, 3000 miles out of warranty alternator died. KIA says it's on you now. 63,000 miles and they want $565.00 to repair; that includes alternator, belts and labor. It's not a repair you can do either, seems AC lines are in the way. Do you think KIA planned it? Ask them about changing spark plugs the rear 3, seems you need to remove the air intake manifold? That will require new gaskets? Not sure of that cost. I hope to dump this Sedona by then! Think twice before you buy, they will get you to pay for that supposedly free 5year/60000 bumper to bumper warranty. RIPOFF.\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " sentiment.score emotion.sadness \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects -0.496327 0.339398 \n", - " A Dream -0.440443 0.145674 \n", - " A Wonderful Ownership Experiance 0.245085 0.113873 \n", - " Best truck ever 0.969981 0.179243 \n", - " Even better than the Chevy -0.075949 0.098830 \n", - " I LOVE MY SLK 0.926009 0.104404 \n", - " I love my Caliber 0.919963 0.187137 \n", - " Looks Good But Hunk Of Junk -0.999568 0.441097 \n", - " Small Reliable Gas Saver! 0.069819 0.193688 \n", - " Veracruz 0.591815 0.153267 \n", + " sentiment.score emotion.sadness \\\n", + "Review_Title \n", + " 1 sweet R32 0.649825 0.151543 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.148035 0.176322 \n", + " 42 days of driving 8 days in the shop -0.054126 0.206478 \n", + " A great little car 0.503785 0.278575 \n", + " AWESOME FUN MY LITTLE TIGER 0.986029 0.007629 \n", + " I LOVE my Focus 0.621983 0.074019 \n", + " Looks Good But Hunk Of Junk -0.984622 0.144671 \n", + " Mr TACOMA 0.633803 0.122766 \n", + " Veracruz 0.591816 0.106981 \n", + " You will pay for that warranty -0.373583 0.396306 \n", "\n", - " emotion.joy emotion.fear \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.053823 0.143729 \n", - " A Dream 0.076986 0.092631 \n", - " A Wonderful Ownership Experiance 0.243949 0.101099 \n", - " Best truck ever 0.277188 0.029521 \n", - " Even better than the Chevy 0.284697 0.054851 \n", - " I LOVE MY SLK 0.694037 0.064407 \n", - " I love my Caliber 0.604041 0.049315 \n", - " Looks Good But Hunk Of Junk 0.283959 0.173938 \n", - " Small Reliable Gas Saver! 0.215213 0.076201 \n", - " Veracruz 0.660735 0.054831 \n", + " emotion.joy emotion.fear \\\n", + "Review_Title \n", + " 1 sweet R32 0.532162 0.067859 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.465210 0.257064 \n", + " 42 days of driving 8 days in the shop 0.563466 0.114506 \n", + " A great little car 0.470586 0.063823 \n", + " AWESOME FUN MY LITTLE TIGER 0.628312 0.013015 \n", + " I LOVE my Focus 0.589196 0.111722 \n", + " Looks Good But Hunk Of Junk 0.061358 0.060613 \n", + " Mr TACOMA 0.825653 0.034777 \n", + " Veracruz 0.524371 0.091482 \n", + " You will pay for that warranty 0.110458 0.056980 \n", "\n", - " emotion.disgust emotion.anger \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 0.072652 0.114714 \n", - " A Dream 0.059397 0.172335 \n", - " A Wonderful Ownership Experiance 0.016000 0.117943 \n", - " Best truck ever 0.014326 0.042779 \n", - " Even better than the Chevy 0.045185 0.044507 \n", - " I LOVE MY SLK 0.036633 0.070950 \n", - " I love my Caliber 0.040379 0.057248 \n", - " Looks Good But Hunk Of Junk 0.097350 0.158059 \n", - " Small Reliable Gas Saver! 0.034896 0.106193 \n", - " Veracruz 0.017731 0.077888 \n", + " emotion.disgust emotion.anger \\\n", + "Review_Title \n", + " 1 sweet R32 0.018501 0.112994 \n", + " 2002 Trans Am/Sunset Orange Metallic 0.032842 0.038908 \n", + " 42 days of driving 8 days in the shop 0.010082 0.082325 \n", + " A great little car 0.015218 0.039688 \n", + " AWESOME FUN MY LITTLE TIGER 0.001452 0.024782 \n", + " I LOVE my Focus 0.008124 0.066092 \n", + " Looks Good But Hunk Of Junk 0.050494 0.116835 \n", + " Mr TACOMA 0.023124 0.030344 \n", + " Veracruz 0.012344 0.054493 \n", + " You will pay for that warranty 0.021192 0.119030 \n", "\n", - " Rating\\r Car_Make \\\n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 2.375 infiniti \n", - " A Dream 4.875 BMW \n", - " A Wonderful Ownership Experiance 4.750 lincoln \n", - " Best truck ever 5.000 Chevrolet \n", - " Even better than the Chevy 5.000 GMC \n", - " I LOVE MY SLK 5.000 mercedes-benz \n", - " I love my Caliber 4.750 dodge \n", - " Looks Good But Hunk Of Junk 2.875 maserati \n", - " Small Reliable Gas Saver! 4.500 mitsubishi \n", - " Veracruz 4.750 hyundai \n", + " Rating\\r Car_Make \\\n", + "Review_Title \n", + " 1 sweet R32 4.875 Volkswagen \n", + " 2002 Trans Am/Sunset Orange Metallic 4.625 pontiac \n", + " 42 days of driving 8 days in the shop 3.375 chrysler \n", + " A great little car 4.875 kia \n", + " AWESOME FUN MY LITTLE TIGER 5.000 fiat \n", + " I LOVE my Focus 4.750 ford \n", + " Looks Good But Hunk Of Junk 2.875 maserati \n", + " Mr TACOMA 5.000 Toyota \n", + " Veracruz 4.750 hyundai \n", + " You will pay for that warranty 2.750 kia \n", "\n", - " Review_Content \n", - "Review_Title \n", - " 2010 M 35 Acceleration/Braking Defects 2010 M 35 Acceleration/Braking Defects: I leased a new 2010 M 35. This was my 9th Nissan/Infinity vehicle since 1996. This vehicle has been a huge disappointment. The vehicle has demonstrated low speed and high speed rpm increases while downs shifting. At high speeds, this is causing additional wear and tear on the drive train as evidenced by a large clanking noise. At low speeds, this is causing excess wear and tear on brakes. In both situations, it poses a safety hazard. Nissan/Infinity claims this is within the normal operating range, but the problem is significant and widespread as evidenced by Nissan/Infinity's own service people and engineers. Infinity will be forced to address this very soon or face a Toyota situation. \n", - " A Dream A Dream: Bought this vehicle for myself with 97000 miles and fully loaded with no apparent deficits (one owner vehicle). Body, engine, paint, interior, exterior in immaculate condition. Have driven it on long and short trips and it continues to elicit stares from lexus and benz owners. This vehicle is made for driving!! \n", - " A Wonderful Ownership Experiance A Wonderful Ownership Experiance: Purchased the car with 20,000 miles on \\rit. I drive every day and I am not \\reasy on a car. My 98 Town Car \\rSignature has been truly a wonderful \\rcar and most of all I have had not one \\rmajor problem with the car and any \\rvisit to the Lincoln service \\rdepartment has only given me great \\rtreatment.The car is always reliable \\rand built tough, I am approaching \\r110,000 miles on it and I have got the \\rurge to get something new, however \\rthis car has been so good to me I \\rdon't want to trade it for something \\rwith problems. I would recomend this \\rcar for anyone who desires a luxury \\rride and reliability. \n", - " Best truck ever Best truck ever: very reliable well rounded truck. \n", - " Even better than the Chevy Even better than the Chevy: This is the best truck I have ever \\rowned. This is my first Full Size, but \\rmy dad has a silverado. My GMC drives \\rjust a little bit better,is a little \\rbit quiter, and the exterior styling \\ris the classiest out of all the trucks \\rin the market. Nobody could talk me \\rout of this truck and into anything \\relse. Eventhough the 2004 Ford \\rinterior has a good design, you still \\rcouldn't pay me enough to own a FORD. \\rThe editor that rated the Sierra lost \\rthere mind, it should be alot higher \\rthan the low 8's. The only thing I \\rregret is not getting the Z71 pkg. \n", - " I LOVE MY SLK I LOVE MY SLK: It is fun driving top down or up but I will srive it top down more weather permitting. It get's pretty darned hot here in Phoenix AZ in July. \n", - " I love my Caliber I love my Caliber: I just bought my Caliber this week So far it has been a blast to drive. I was so surprised how much pep it's got for a small engine. It's also very smooth, you don't even feel the gear shift, I love that. I also love the way it feels like an SUV but looks like a sporty car. So far I love everything about it. \n", - " Looks Good But Hunk Of Junk Looks Good But Hunk Of Junk: This car is strictly \"looks only\", it is not reliable or even close to it.I have already sank $13,760 in repairs at only 23K miles.This is totally unacceptable for a $140K car when new.I am taking it to the auction next week to \"unload\" before it can empty my wallet again.But if you want a sharp car that sits good in the driveway - this is it!Just don't drive it anywhere!! \n", - " Small Reliable Gas Saver! Small Reliable Gas Saver!: Purchase car some 166,000 miles ago and have enjoyed its reliability, gas savings, and small car comfort. Yes, it does hesitate on steep hills (only has a 1.5 engine), but it makes the hills. I have traveled from Florida to Nevada, Texas, Oklahoma, Missouri, and now live in Pennsylvania. I make trips to New York and Massachusetts. I do not tire in driving this car. Maintenance has not been a major issue. Yes, I have changed the struts, brakes - but that is to be expected with its mileage. I do recommend consideration of this vehicle for college or young family starting out in life. It has more than paid for its purchase price many, many times. Enjoy. \n", - " Veracruz Veracruz: This is a crossover with the ride of a cruse ship. The car has so many bells and whistles. Have it one week and already over 1100 miles. Finding wonderful things about it every day. Could be the best car ever. " + " Review_Content \n", + "Review_Title \n", + " 1 sweet R32 1 sweet R32: I was looking into buying a Subaru WRX \\rSTI, but after two test drives in each \\rand reading as many \\rRoad&Track,Car&Driver,and any other \\rinfo I could find I desided to go with \\rthe R32. I traded in my 2003 GTI VR6 \\rthat had 29,000 miles on it. That was a \\rgreat car but this is a whole new \\rbeast. Once you own an all wheel drive \\rthere is just no going back. This car \\rhandles like a dream, the seats are the \\rbest I've ever been in. Cabin is put \\rtogether very well and the pipes are \\rcrazy. The climit control is awsome, \\rheated seats are so sweet on those cold \\rwinter days. I live in the central \\rvalley of California so these tire are \\rthe best. If there was one thing I \\rwould change(give me a spare tire)!!!!! \n", + " 2002 Trans Am/Sunset Orange Metallic 2002 Trans Am/Sunset Orange Metallic: This Is Pontiac's most exciting vehicle \\rof all time.It has so much performance \\rthat it is a big disapointment that it \\rwill be discontinued this year.The only \\rarea that this vehicle does not excell \\rin would be the fuel economy \\rdepartment.I guess that if you can \\rafford one of these dream cars, you \\rreally dony worry about how far it will \\rtravel on a tankfull of gas. \n", + " 42 days of driving 8 days in the shop 42 days of driving 8 days in the shop : I was given the sebring for my 20th wedding anniversary. I have been in love with it for years and finally got it. After 42 days I blew most of the electrical system. It has been at the dealer for 8 days and they can not find the problem. Right now I am not very happy. \n", + " A great little car A great little car: Bought my Spectra about one year ago, currently has about 18,000 miles on it. I have had absolutely no problems with it. I had cruise control added at the time of purchase, other than that it's stock. This is my daily driver, it's comfortable, reliable and gets decent mileage. The Spectra happens to be my second Kia, I have a Sedona van that has been to the dealer several times (however everything was covered by the warranty) it currently has 58,000 miles on it. The Spectra's a great handling car. \n", + " AWESOME FUN MY LITTLE TIGER AWESOME FUN MY LITTLE TIGER: Abarth is ultimately more fun than my old mustang or Z a little power house that doesn't shy away from a fight love the engine growl and the kick more room than you think awesome bang for the buck .Fun the most fun than any car I have ever own worth every penny a pleasure to drive. \n", + " I LOVE my Focus I LOVE my Focus: I LOVE my Focus. I've had it about 2 \\ryears. It drives great, looks good, \\rgets great gas milage and never slows \\rdown. I'm even thinking of getting \\ranother one on my next car purchase! \n", + " Looks Good But Hunk Of Junk Looks Good But Hunk Of Junk: This car is strictly \"looks only\", it is not reliable or even close to it.I have already sank $13,760 in repairs at only 23K miles.This is totally unacceptable for a $140K car when new.I am taking it to the auction next week to \"unload\" before it can empty my wallet again.But if you want a sharp car that sits good in the driveway - this is it!Just don't drive it anywhere!! \n", + " Mr TACOMA Mr TACOMA: Great truck. The Handling is pretty \\rnice and the engine is stronger. The V6 \\rwith 3100 pounds can really make this \\rtruck move. \n", + " Veracruz Veracruz: This is a crossover with the ride of a cruse ship. The car has so many bells and whistles. Have it one week and already over 1100 miles. Finding wonderful things about it every day. Could be the best car ever. \n", + " You will pay for that warranty You will pay for that warranty: Own a 2002 KIA Sedona EX. I complained about lights going dim while under warranty. Kia checked, said everything within parameters. Guess what, 3000 miles out of warranty alternator died. KIA says it's on you now. 63,000 miles and they want $565.00 to repair; that includes alternator, belts and labor. It's not a repair you can do either, seems AC lines are in the way. Do you think KIA planned it? Ask them about changing spark plugs the rear 3, seems you need to remove the air intake manifold? That will require new gaskets? Not sure of that cost. I hope to dump this Sedona by then! Think twice before you buy, they will get you to pay for that supposedly free 5year/60000 bumper to bumper warranty. RIPOFF. " ] }, "execution_count": 47, @@ -5065,55 +4612,56 @@ "data": { "text/plain": [ "Car_Make\n", - "AMGeneral 3\n", - "Acura 137\n", - "AlfaRomeo 58\n", - "AstonMartin 65\n", - "Audi 141\n", - "BMW 149\n", - "Bentley 101\n", - "Bugatti 8\n", - "Buick 132\n", - "Cadillac 157\n", - "Chevrolet 146\n", - "GMC 145\n", - "Honda 147\n", - "Toyota 145\n", - "Volkswagen 140\n", - "chrysler 127\n", + "AMGeneral 2\n", + "Acura 154\n", + "AlfaRomeo 60\n", + "AstonMartin 55\n", + "Audi 144\n", + "BMW 143\n", + "Bentley 102\n", + "Bugatti 7\n", + "Buick 134\n", + "Cadillac 140\n", + "Chevrolet 157\n", + "GMC 133\n", + "Honda 143\n", + "Toyota 130\n", + "Volkswagen 152\n", + "chrysler 136\n", "dodge 142\n", - "ferrari 119\n", - "fiat 136\n", - "ford 142\n", - "genesis 56\n", - "hummer 144\n", - "hyundai 130\n", - "infiniti 130\n", - "isuzu 131\n", - "jaguar 126\n", - "jeep 131\n", - "kia 127\n", - "lamborghini 60\n", - "land-rover 145\n", - "lexus 133\n", - "lincoln 133\n", - "lotus 97\n", - "maserati 139\n", - "maybach 11\n", - "mazda 146\n", - "mercedes-benz 129\n", - "mercury 126\n", - "mini 143\n", - "mitsubishi 128\n", - "nissan 127\n", - "pontiac 127\n", - "porsche 140\n", - "ram 132\n", - "rolls-royce 18\n", - "subaru 138\n", - "suzuki 116\n", - "tesla 99\n", - "volvo 130\n", + "ferrari 111\n", + "fiat 142\n", + "ford 138\n", + "genesis 48\n", + "hummer 149\n", + "hyundai 142\n", + "infiniti 134\n", + "isuzu 137\n", + "jaguar 128\n", + "jeep 127\n", + "kia 126\n", + "lamborghini 54\n", + "land-rover 141\n", + "lexus 125\n", + "lincoln 138\n", + "lotus 102\n", + "maserati 136\n", + "maybach 15\n", + "mazda 137\n", + "mclaren 1\n", + "mercedes-benz 133\n", + "mercury 131\n", + "mini 142\n", + "mitsubishi 118\n", + "nissan 125\n", + "pontiac 132\n", + "porsche 136\n", + "ram 152\n", + "rolls-royce 23\n", + "subaru 129\n", + "suzuki 121\n", + "tesla 100\n", + "volvo 135\n", "dtype: int64" ] }, @@ -5154,10 +4702,14 @@ { "cell_type": "code", "execution_count": 52, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "test_set['Predicted_Y'] = Y_pred" + "predicted_y_with_na = np.zeros(len(test_set.index), dtype=object)\n", + "predicted_y_with_na[~test_set.isna().any(axis=1)] = Y_pred\n", + "test_set['Predicted_Y'] = predicted_y_with_na" ] }, { @@ -5190,12 +4742,12 @@ " \n", " \n", " \n", - " sentiment.score\n", " emotion.sadness\n", " emotion.joy\n", " emotion.fear\n", " emotion.disgust\n", " emotion.anger\n", + " sentiment.score\n", " Rating\\r\n", " Predicted_Y\n", " \n", @@ -5225,666 +4777,653 @@ " \n", " \n", " AMGeneral\n", - " -0.594638\n", - " 0.240532\n", - " 0.020233\n", - " 0.209863\n", - " 0.148217\n", - " 0.280780\n", - " 3.000000\n", - " 3.194602\n", + " 0.233502\n", + " 0.416527\n", + " 0.149416\n", + " 0.030530\n", + " 0.065171\n", + " 0.021626\n", + " 4.833333\n", + " 4.264005\n", " \n", " \n", " Acura\n", - " 0.146076\n", - " 0.216153\n", - " 0.310447\n", - " 0.112656\n", - " 0.088328\n", - " 0.118972\n", - " 4.209746\n", - " 4.227164\n", + " 0.186803\n", + " 0.467307\n", + " 0.134082\n", + " 0.020744\n", + " 0.064254\n", + " 0.330192\n", + " 4.538690\n", + " 4.455716\n", " \n", " \n", " AlfaRomeo\n", - " 0.160632\n", - " 0.232433\n", - " 0.315023\n", - " 0.125210\n", - " 0.082053\n", - " 0.122382\n", - " 4.388889\n", - " 4.322972\n", + " 0.179048\n", + " 0.434307\n", + " 0.101048\n", + " 0.030507\n", + " 0.088323\n", + " 0.268080\n", + " 4.187500\n", + " 4.435494\n", " \n", " \n", " AstonMartin\n", - " 0.400920\n", - " 0.201064\n", - " 0.362957\n", - " 0.114105\n", - " 0.061657\n", - " 0.115535\n", - " 4.326087\n", - " 4.448320\n", + " 0.161465\n", + " 0.532924\n", + " 0.093979\n", + " 0.030943\n", + " 0.063027\n", + " 0.470149\n", + " 4.613636\n", + " 4.631018\n", " \n", " \n", " Audi\n", - " 0.252668\n", - " 0.195495\n", - " 0.330413\n", - " 0.106549\n", - " 0.059369\n", - " 0.103690\n", - " 4.445755\n", - " 4.365891\n", + " 0.196609\n", + " 0.490965\n", + " 0.092430\n", + " 0.021402\n", + " 0.059860\n", + " 0.303431\n", + " 4.453431\n", + " 4.421119\n", " \n", " \n", " BMW\n", - " 0.159408\n", - " 0.194169\n", - " 0.368360\n", - " 0.120398\n", - " 0.061844\n", - " 0.123578\n", - " 4.513889\n", - " 4.293852\n", + " 0.191940\n", + " 0.474563\n", + " 0.086499\n", + " 0.024038\n", + " 0.072675\n", + " 0.243201\n", + " 4.468750\n", + " 4.354955\n", " \n", " \n", " Bentley\n", - " 0.307778\n", - " 0.172982\n", - " 0.371687\n", - " 0.110235\n", - " 0.078887\n", - " 0.130370\n", - " 4.229730\n", - " 4.410651\n", + " 0.187771\n", + " 0.528449\n", + " 0.089768\n", + " 0.028513\n", + " 0.057980\n", + " 0.441103\n", + " 4.239583\n", + " 4.57587\n", " \n", " \n", " Bugatti\n", - " -0.111498\n", - " 0.147069\n", - " 0.482210\n", - " 0.122855\n", - " 0.072881\n", - " 0.097407\n", - " 4.250000\n", - " 4.382092\n", + " 0.188314\n", + " 0.587727\n", + " 0.055366\n", + " 0.023677\n", + " 0.054105\n", + " 0.430882\n", + " 4.750000\n", + " 4.718716\n", " \n", " \n", " Buick\n", - " 0.240515\n", - " 0.232104\n", - " 0.322004\n", - " 0.117273\n", - " 0.075984\n", - " 0.112750\n", - " 4.292373\n", - " 4.254791\n", + " 0.260452\n", + " 0.392718\n", + " 0.099129\n", + " 0.025511\n", + " 0.086554\n", + " 0.088404\n", + " 4.162736\n", + " 4.075432\n", " \n", " \n", " Cadillac\n", - " 0.083118\n", - " 0.212384\n", - " 0.282956\n", - " 0.120468\n", - " 0.074504\n", - " 0.128036\n", - " 4.112500\n", - " 4.160872\n", + " 0.218803\n", + " 0.429976\n", + " 0.102534\n", + " 0.030479\n", + " 0.069962\n", + " 0.274075\n", + " 4.395408\n", + " 4.335975\n", " \n", " \n", " Chevrolet\n", - " -0.042871\n", - " 0.293122\n", - " 0.217664\n", - " 0.131120\n", - " 0.075338\n", - " 0.136536\n", - " 4.103125\n", - " 3.971316\n", + " 0.200984\n", + " 0.441252\n", + " 0.102457\n", + " 0.037943\n", + " 0.074343\n", + " 0.173096\n", + " 4.104730\n", + " 4.23044\n", " \n", " \n", " GMC\n", - " 0.117682\n", - " 0.215651\n", - " 0.295742\n", - " 0.117902\n", - " 0.059368\n", - " 0.115138\n", - " 4.084302\n", - " 4.206823\n", + " 0.218755\n", + " 0.416075\n", + " 0.111135\n", + " 0.033378\n", + " 0.067579\n", + " 0.071438\n", + " 4.089912\n", + " 4.138499\n", " \n", " \n", " Honda\n", - " 0.034251\n", - " 0.233150\n", - " 0.266702\n", - " 0.132316\n", - " 0.061904\n", - " 0.138990\n", - " 4.255435\n", - " 4.059390\n", + " 0.191337\n", + " 0.418301\n", + " 0.117569\n", + " 0.029766\n", + " 0.063076\n", + " 0.118789\n", + " 3.832386\n", + " 4.130003\n", " \n", " \n", " Toyota\n", - " 0.145704\n", - " 0.212612\n", - " 0.327585\n", - " 0.104289\n", - " 0.060387\n", - " 0.109918\n", - " 4.222826\n", - " 4.311795\n", + " 0.199025\n", + " 0.431638\n", + " 0.104728\n", + " 0.027125\n", + " 0.070333\n", + " 0.154561\n", + " 4.350543\n", + " 4.243079\n", " \n", " \n", " Volkswagen\n", - " 0.005947\n", - " 0.259367\n", - " 0.276090\n", - " 0.128501\n", - " 0.073617\n", - " 0.139398\n", - " 4.043478\n", - " 3.980834\n", + " 0.190767\n", + " 0.429078\n", + " 0.126892\n", + " 0.031965\n", + " 0.071180\n", + " 0.130390\n", + " 4.396875\n", + " 4.17746\n", " \n", " \n", " chrysler\n", - " 0.063154\n", - " 0.222972\n", - " 0.320289\n", - " 0.118277\n", - " 0.077105\n", - " 0.126091\n", - " 3.994898\n", - " 4.036067\n", + " 0.234828\n", + " 0.398700\n", + " 0.116330\n", + " 0.035023\n", + " 0.075663\n", + " 0.086065\n", + " 4.140957\n", + " 4.168451\n", " \n", " \n", " dodge\n", - " 0.149016\n", - " 0.228312\n", - " 0.319779\n", - " 0.117976\n", - " 0.061401\n", - " 0.115340\n", - " 4.259146\n", - " 4.250613\n", + " 0.218513\n", + " 0.408767\n", + " 0.119761\n", + " 0.026407\n", + " 0.076249\n", + " 0.100277\n", + " 4.133929\n", + " 4.163826\n", " \n", " \n", " ferrari\n", - " 0.486829\n", - " 0.144508\n", - " 0.437605\n", - " 0.095123\n", - " 0.055691\n", - " 0.092777\n", - " 4.684211\n", - " 4.588468\n", + " 0.159649\n", + " 0.539798\n", + " 0.108343\n", + " 0.019731\n", + " 0.082763\n", + " 0.463863\n", + " 4.767241\n", + " 4.530158\n", " \n", " \n", " fiat\n", - " 0.127402\n", - " 0.228702\n", - " 0.359161\n", - " 0.124966\n", - " 0.065926\n", - " 0.135856\n", - " 3.957500\n", - " 4.083914\n", + " 0.203202\n", + " 0.401303\n", + " 0.100537\n", + " 0.030897\n", + " 0.076235\n", + " 0.087065\n", + " 3.818878\n", + " 4.134359\n", " \n", " \n", " ford\n", - " 0.244159\n", - " 0.206878\n", - " 0.331643\n", - " 0.109789\n", - " 0.072558\n", - " 0.116549\n", - " 4.267857\n", - " 4.332619\n", + " 0.238288\n", + " 0.362188\n", + " 0.121460\n", + " 0.028063\n", + " 0.088316\n", + " 0.078135\n", + " 4.040094\n", + " 4.005134\n", " \n", " \n", " genesis\n", - " 0.194682\n", - " 0.185214\n", - " 0.281426\n", - " 0.118825\n", - " 0.056142\n", - " 0.144689\n", - " 4.076923\n", - " 4.263187\n", + " 0.211237\n", + " 0.430926\n", + " 0.078043\n", + " 0.031972\n", + " 0.057856\n", + " 0.156253\n", + " 4.608696\n", + " 4.316763\n", " \n", " \n", " hummer\n", - " 0.188878\n", - " 0.193048\n", - " 0.345909\n", - " 0.109266\n", - " 0.057429\n", - " 0.106301\n", - " 4.321429\n", - " 4.332991\n", + " 0.181750\n", + " 0.502888\n", + " 0.126320\n", + " 0.027008\n", + " 0.055950\n", + " 0.297203\n", + " 4.404605\n", + " 4.462752\n", " \n", " \n", " hyundai\n", - " 0.065004\n", - " 0.264548\n", - " 0.307687\n", - " 0.115017\n", - " 0.057827\n", - " 0.136106\n", - " 4.082500\n", - " 4.064445\n", + " 0.220990\n", + " 0.393721\n", + " 0.096735\n", + " 0.027251\n", + " 0.079915\n", + " 0.161732\n", + " 4.109375\n", + " 4.14899\n", " \n", " \n", " infiniti\n", - " 0.341308\n", - " 0.182078\n", - " 0.344738\n", - " 0.116468\n", - " 0.059517\n", - " 0.111160\n", - " 4.553977\n", - " 4.423686\n", + " 0.200538\n", + " 0.469661\n", + " 0.090667\n", + " 0.024671\n", + " 0.059761\n", + " 0.322187\n", + " 4.566860\n", + " 4.393907\n", " \n", " \n", " isuzu\n", - " 0.075290\n", - " 0.219361\n", - " 0.266676\n", - " 0.126747\n", - " 0.066575\n", - " 0.114220\n", - " 4.309211\n", - " 4.228860\n", + " 0.201127\n", + " 0.404813\n", + " 0.122677\n", + " 0.028856\n", + " 0.101334\n", + " 0.205943\n", + " 4.220238\n", + " 4.306578\n", " \n", " \n", " jaguar\n", - " 0.253660\n", - " 0.178868\n", - " 0.378857\n", - " 0.095227\n", - " 0.061614\n", - " 0.106031\n", - " 4.439815\n", - " 4.317453\n", + " 0.163703\n", + " 0.556705\n", + " 0.086785\n", + " 0.025675\n", + " 0.055537\n", + " 0.375661\n", + " 4.584091\n", + " 4.497573\n", " \n", " \n", " jeep\n", - " 0.077878\n", - " 0.244649\n", - " 0.344078\n", - " 0.111979\n", - " 0.063442\n", - " 0.115510\n", - " 3.899554\n", - " 4.093507\n", + " 0.253255\n", + " 0.396047\n", + " 0.104165\n", + " 0.025074\n", + " 0.079216\n", + " 0.106891\n", + " 4.108607\n", + " 4.15378\n", " \n", " \n", " kia\n", - " 0.253933\n", - " 0.198484\n", - " 0.367603\n", - " 0.104703\n", - " 0.066781\n", - " 0.111330\n", - " 4.219444\n", - " 4.330955\n", + " 0.266675\n", + " 0.394792\n", + " 0.111849\n", + " 0.025904\n", + " 0.070382\n", + " 0.141621\n", + " 4.141827\n", + " 4.12064\n", " \n", " \n", " lamborghini\n", - " 0.412362\n", - " 0.091284\n", - " 0.396143\n", - " 0.085355\n", - " 0.062690\n", - " 0.083136\n", - " 4.613636\n", - " 4.633120\n", + " 0.127094\n", + " 0.629176\n", + " 0.082221\n", + " 0.029788\n", + " 0.052919\n", + " 0.657044\n", + " 4.725000\n", + " 4.665769\n", " \n", " \n", " land-rover\n", - " 0.244489\n", - " 0.202602\n", - " 0.334492\n", - " 0.120057\n", - " 0.058127\n", - " 0.101070\n", - " 4.154762\n", - " 4.383457\n", + " 0.274524\n", + " 0.355115\n", + " 0.109827\n", + " 0.034846\n", + " 0.086169\n", + " 0.080642\n", + " 3.848837\n", + " 4.0177\n", " \n", " \n", " lexus\n", - " 0.200350\n", - " 0.183309\n", - " 0.344489\n", - " 0.109256\n", - " 0.050643\n", - " 0.120852\n", - " 4.349432\n", - " 4.253620\n", + " 0.202997\n", + " 0.439800\n", + " 0.100562\n", + " 0.031977\n", + " 0.076374\n", + " 0.231606\n", + " 4.306122\n", + " 4.294433\n", " \n", " \n", " lincoln\n", - " 0.291578\n", - " 0.201160\n", - " 0.330980\n", - " 0.125137\n", - " 0.069068\n", - " 0.122796\n", - " 4.494318\n", - " 4.392293\n", + " 0.213433\n", + " 0.466320\n", + " 0.112260\n", + " 0.027192\n", + " 0.075736\n", + " 0.163414\n", + " 4.269231\n", + " 4.264042\n", " \n", " \n", " lotus\n", - " 0.249474\n", - " 0.186374\n", - " 0.368691\n", - " 0.135556\n", - " 0.063595\n", - " 0.132431\n", - " 4.590909\n", - " 4.478621\n", + " 0.158467\n", + " 0.457916\n", + " 0.137350\n", + " 0.023458\n", + " 0.080445\n", + " 0.307135\n", + " 4.702381\n", + " 4.490183\n", " \n", " \n", " maserati\n", - " 0.250559\n", - " 0.219400\n", - " 0.398471\n", - " 0.101808\n", - " 0.060194\n", - " 0.111714\n", - " 4.439103\n", - " 4.319154\n", + " 0.174925\n", + " 0.523992\n", + " 0.087822\n", + " 0.037616\n", + " 0.071795\n", + " 0.311256\n", + " 4.431250\n", + " 4.394369\n", " \n", " \n", " maybach\n", - " 0.646201\n", - " 0.113256\n", - " 0.601533\n", - " 0.047746\n", - " 0.043509\n", - " 0.071882\n", - " 4.732143\n", - " 4.710683\n", + " 0.178194\n", + " 0.515520\n", + " 0.077657\n", + " 0.015687\n", + " 0.072357\n", + " 0.633714\n", + " 4.958333\n", + " 4.733771\n", " \n", " \n", " mazda\n", - " 0.187200\n", - " 0.202740\n", - " 0.345586\n", - " 0.108034\n", - " 0.075693\n", - " 0.109664\n", - " 4.346429\n", - " 4.247244\n", - " \n", - " \n", - " mclaren\n", - " 0.781623\n", - " 0.176243\n", - " 0.331202\n", - " 0.111601\n", - " 0.068376\n", - " 0.115666\n", - " 5.000000\n", - " 4.694830\n", + " 0.203442\n", + " 0.444971\n", + " 0.111021\n", + " 0.026170\n", + " 0.062387\n", + " 0.230268\n", + " 4.479651\n", + " 4.318612\n", " \n", " \n", " mercedes-benz\n", - " 0.253613\n", - " 0.212727\n", - " 0.314652\n", - " 0.103435\n", - " 0.057229\n", - " 0.122127\n", - " 4.500000\n", - " 4.275059\n", + " 0.227015\n", + " 0.387488\n", + " 0.105910\n", + " 0.026781\n", + " 0.085045\n", + " 0.091385\n", + " 4.095745\n", + " 4.094125\n", " \n", " \n", " mercury\n", - " 0.223070\n", - " 0.226492\n", - " 0.308719\n", - " 0.116383\n", - " 0.060396\n", - " 0.110053\n", - " 4.466837\n", - " 4.308947\n", + " 0.200664\n", + " 0.462219\n", + " 0.105958\n", + " 0.025067\n", + " 0.063838\n", + " 0.246360\n", + " 4.311224\n", + " 4.390451\n", " \n", " \n", " mini\n", - " 0.178291\n", - " 0.214415\n", - " 0.391739\n", - " 0.117223\n", - " 0.063220\n", - " 0.149629\n", - " 4.161932\n", - " 4.272955\n", + " 0.218531\n", + " 0.443708\n", + " 0.096154\n", + " 0.026429\n", + " 0.071672\n", + " 0.167861\n", + " 4.036184\n", + " 4.190949\n", " \n", " \n", " mitsubishi\n", - " 0.306894\n", - " 0.174691\n", - " 0.380305\n", - " 0.098874\n", - " 0.058135\n", - " 0.103309\n", - " 4.517045\n", - " 4.431931\n", + " 0.175781\n", + " 0.481554\n", + " 0.118347\n", + " 0.025169\n", + " 0.070454\n", + " 0.300219\n", + " 4.346698\n", + " 4.417798\n", " \n", " \n", " nissan\n", - " 0.047395\n", - " 0.264765\n", - " 0.244476\n", - " 0.135122\n", - " 0.064131\n", - " 0.135230\n", - " 3.986364\n", - " 4.001050\n", + " 0.241268\n", + " 0.373916\n", + " 0.111114\n", + " 0.034341\n", + " 0.079956\n", + " 0.102161\n", + " 4.247093\n", + " 4.119348\n", " \n", " \n", " pontiac\n", - " 0.198725\n", - " 0.202958\n", - " 0.363313\n", - " 0.118067\n", - " 0.065794\n", - " 0.110013\n", - " 4.366848\n", - " 4.285907\n", + " 0.190257\n", + " 0.430994\n", + " 0.110610\n", + " 0.026520\n", + " 0.078449\n", + " 0.165777\n", + " 4.375000\n", + " 4.221752\n", " \n", " \n", " porsche\n", - " 0.336036\n", - " 0.167812\n", - " 0.410677\n", - " 0.099369\n", - " 0.062932\n", - " 0.110313\n", - " 4.500000\n", - " 4.463378\n", + " 0.145226\n", + " 0.510727\n", + " 0.093447\n", + " 0.026101\n", + " 0.080697\n", + " 0.382931\n", + " 4.662500\n", + " 4.552274\n", " \n", " \n", " ram\n", - " 0.049370\n", - " 0.274420\n", - " 0.316925\n", - " 0.117071\n", - " 0.055857\n", - " 0.120195\n", - " 3.731818\n", - " 4.006690\n", + " 0.240626\n", + " 0.367544\n", + " 0.108349\n", + " 0.040745\n", + " 0.076267\n", + " 0.000294\n", + " 3.861111\n", + " 4.113366\n", " \n", " \n", " rolls-royce\n", - " 0.384668\n", - " 0.147421\n", - " 0.450735\n", - " 0.107796\n", - " 0.072940\n", - " 0.087974\n", - " 4.375000\n", - " 4.476755\n", + " 0.260943\n", + " 0.412009\n", + " 0.080448\n", + " 0.037039\n", + " 0.072188\n", + " 0.321649\n", + " 4.843750\n", + " 4.508778\n", " \n", " \n", " subaru\n", - " 0.157244\n", - " 0.206887\n", - " 0.325066\n", - " 0.127740\n", - " 0.067235\n", - " 0.122245\n", - " 4.104167\n", - " 4.261672\n", + " 0.202121\n", + " 0.470115\n", + " 0.103731\n", + " 0.020184\n", + " 0.070802\n", + " 0.301044\n", + " 4.257212\n", + " 4.327014\n", " \n", " \n", " suzuki\n", - " 0.187571\n", - " 0.239506\n", - " 0.297699\n", - " 0.122788\n", - " 0.056956\n", - " 0.111634\n", - " 4.210366\n", - " 4.307377\n", + " 0.206990\n", + " 0.410432\n", + " 0.111564\n", + " 0.033440\n", + " 0.075242\n", + " 0.114764\n", + " 4.235119\n", + " 4.255248\n", " \n", " \n", " tesla\n", - " 0.085558\n", - " 0.238399\n", - " 0.334405\n", - " 0.095213\n", - " 0.051772\n", - " 0.125053\n", - " 4.194853\n", - " 4.303805\n", + " 0.296216\n", + " 0.379065\n", + " 0.064811\n", + " 0.024911\n", + " 0.066818\n", + " 0.154607\n", + " 4.673387\n", + " 4.284923\n", " \n", " \n", " volvo\n", - " 0.022950\n", - " 0.252460\n", - " 0.279139\n", - " 0.143669\n", - " 0.062025\n", - " 0.130268\n", - " 4.140957\n", - " 4.079470\n", + " 0.204267\n", + " 0.433600\n", + " 0.113805\n", + " 0.024111\n", + " 0.071134\n", + " 0.220652\n", + " 4.380814\n", + " 4.281185\n", " \n", " \n", "\n", "" ], "text/plain": [ - " sentiment.score emotion.sadness emotion.joy emotion.fear \\\n", - " mean mean mean mean \n", + " emotion.sadness emotion.joy emotion.fear emotion.disgust \\\n", + " mean mean mean mean \n", "Car_Make \n", - "AMGeneral -0.594638 0.240532 0.020233 0.209863 \n", - "Acura 0.146076 0.216153 0.310447 0.112656 \n", - "AlfaRomeo 0.160632 0.232433 0.315023 0.125210 \n", - "AstonMartin 0.400920 0.201064 0.362957 0.114105 \n", - "Audi 0.252668 0.195495 0.330413 0.106549 \n", - "BMW 0.159408 0.194169 0.368360 0.120398 \n", - "Bentley 0.307778 0.172982 0.371687 0.110235 \n", - "Bugatti -0.111498 0.147069 0.482210 0.122855 \n", - "Buick 0.240515 0.232104 0.322004 0.117273 \n", - "Cadillac 0.083118 0.212384 0.282956 0.120468 \n", - "Chevrolet -0.042871 0.293122 0.217664 0.131120 \n", - "GMC 0.117682 0.215651 0.295742 0.117902 \n", - "Honda 0.034251 0.233150 0.266702 0.132316 \n", - "Toyota 0.145704 0.212612 0.327585 0.104289 \n", - "Volkswagen 0.005947 0.259367 0.276090 0.128501 \n", - "chrysler 0.063154 0.222972 0.320289 0.118277 \n", - "dodge 0.149016 0.228312 0.319779 0.117976 \n", - "ferrari 0.486829 0.144508 0.437605 0.095123 \n", - "fiat 0.127402 0.228702 0.359161 0.124966 \n", - "ford 0.244159 0.206878 0.331643 0.109789 \n", - "genesis 0.194682 0.185214 0.281426 0.118825 \n", - "hummer 0.188878 0.193048 0.345909 0.109266 \n", - "hyundai 0.065004 0.264548 0.307687 0.115017 \n", - "infiniti 0.341308 0.182078 0.344738 0.116468 \n", - "isuzu 0.075290 0.219361 0.266676 0.126747 \n", - "jaguar 0.253660 0.178868 0.378857 0.095227 \n", - "jeep 0.077878 0.244649 0.344078 0.111979 \n", - "kia 0.253933 0.198484 0.367603 0.104703 \n", - "lamborghini 0.412362 0.091284 0.396143 0.085355 \n", - "land-rover 0.244489 0.202602 0.334492 0.120057 \n", - "lexus 0.200350 0.183309 0.344489 0.109256 \n", - "lincoln 0.291578 0.201160 0.330980 0.125137 \n", - "lotus 0.249474 0.186374 0.368691 0.135556 \n", - "maserati 0.250559 0.219400 0.398471 0.101808 \n", - "maybach 0.646201 0.113256 0.601533 0.047746 \n", - "mazda 0.187200 0.202740 0.345586 0.108034 \n", - "mclaren 0.781623 0.176243 0.331202 0.111601 \n", - "mercedes-benz 0.253613 0.212727 0.314652 0.103435 \n", - "mercury 0.223070 0.226492 0.308719 0.116383 \n", - "mini 0.178291 0.214415 0.391739 0.117223 \n", - "mitsubishi 0.306894 0.174691 0.380305 0.098874 \n", - "nissan 0.047395 0.264765 0.244476 0.135122 \n", - "pontiac 0.198725 0.202958 0.363313 0.118067 \n", - "porsche 0.336036 0.167812 0.410677 0.099369 \n", - "ram 0.049370 0.274420 0.316925 0.117071 \n", - "rolls-royce 0.384668 0.147421 0.450735 0.107796 \n", - "subaru 0.157244 0.206887 0.325066 0.127740 \n", - "suzuki 0.187571 0.239506 0.297699 0.122788 \n", - "tesla 0.085558 0.238399 0.334405 0.095213 \n", - "volvo 0.022950 0.252460 0.279139 0.143669 \n", + "AMGeneral 0.233502 0.416527 0.149416 0.030530 \n", + "Acura 0.186803 0.467307 0.134082 0.020744 \n", + "AlfaRomeo 0.179048 0.434307 0.101048 0.030507 \n", + "AstonMartin 0.161465 0.532924 0.093979 0.030943 \n", + "Audi 0.196609 0.490965 0.092430 0.021402 \n", + "BMW 0.191940 0.474563 0.086499 0.024038 \n", + "Bentley 0.187771 0.528449 0.089768 0.028513 \n", + "Bugatti 0.188314 0.587727 0.055366 0.023677 \n", + "Buick 0.260452 0.392718 0.099129 0.025511 \n", + "Cadillac 0.218803 0.429976 0.102534 0.030479 \n", + "Chevrolet 0.200984 0.441252 0.102457 0.037943 \n", + "GMC 0.218755 0.416075 0.111135 0.033378 \n", + "Honda 0.191337 0.418301 0.117569 0.029766 \n", + "Toyota 0.199025 0.431638 0.104728 0.027125 \n", + "Volkswagen 0.190767 0.429078 0.126892 0.031965 \n", + "chrysler 0.234828 0.398700 0.116330 0.035023 \n", + "dodge 0.218513 0.408767 0.119761 0.026407 \n", + "ferrari 0.159649 0.539798 0.108343 0.019731 \n", + "fiat 0.203202 0.401303 0.100537 0.030897 \n", + "ford 0.238288 0.362188 0.121460 0.028063 \n", + "genesis 0.211237 0.430926 0.078043 0.031972 \n", + "hummer 0.181750 0.502888 0.126320 0.027008 \n", + "hyundai 0.220990 0.393721 0.096735 0.027251 \n", + "infiniti 0.200538 0.469661 0.090667 0.024671 \n", + "isuzu 0.201127 0.404813 0.122677 0.028856 \n", + "jaguar 0.163703 0.556705 0.086785 0.025675 \n", + "jeep 0.253255 0.396047 0.104165 0.025074 \n", + "kia 0.266675 0.394792 0.111849 0.025904 \n", + "lamborghini 0.127094 0.629176 0.082221 0.029788 \n", + "land-rover 0.274524 0.355115 0.109827 0.034846 \n", + "lexus 0.202997 0.439800 0.100562 0.031977 \n", + "lincoln 0.213433 0.466320 0.112260 0.027192 \n", + "lotus 0.158467 0.457916 0.137350 0.023458 \n", + "maserati 0.174925 0.523992 0.087822 0.037616 \n", + "maybach 0.178194 0.515520 0.077657 0.015687 \n", + "mazda 0.203442 0.444971 0.111021 0.026170 \n", + "mercedes-benz 0.227015 0.387488 0.105910 0.026781 \n", + "mercury 0.200664 0.462219 0.105958 0.025067 \n", + "mini 0.218531 0.443708 0.096154 0.026429 \n", + "mitsubishi 0.175781 0.481554 0.118347 0.025169 \n", + "nissan 0.241268 0.373916 0.111114 0.034341 \n", + "pontiac 0.190257 0.430994 0.110610 0.026520 \n", + "porsche 0.145226 0.510727 0.093447 0.026101 \n", + "ram 0.240626 0.367544 0.108349 0.040745 \n", + "rolls-royce 0.260943 0.412009 0.080448 0.037039 \n", + "subaru 0.202121 0.470115 0.103731 0.020184 \n", + "suzuki 0.206990 0.410432 0.111564 0.033440 \n", + "tesla 0.296216 0.379065 0.064811 0.024911 \n", + "volvo 0.204267 0.433600 0.113805 0.024111 \n", "\n", - " emotion.disgust emotion.anger Rating\\r Predicted_Y \n", - " mean mean mean mean \n", + " emotion.anger sentiment.score Rating\\r Predicted_Y \n", + " mean mean mean mean \n", "Car_Make \n", - "AMGeneral 0.148217 0.280780 3.000000 3.194602 \n", - "Acura 0.088328 0.118972 4.209746 4.227164 \n", - "AlfaRomeo 0.082053 0.122382 4.388889 4.322972 \n", - "AstonMartin 0.061657 0.115535 4.326087 4.448320 \n", - "Audi 0.059369 0.103690 4.445755 4.365891 \n", - "BMW 0.061844 0.123578 4.513889 4.293852 \n", - "Bentley 0.078887 0.130370 4.229730 4.410651 \n", - "Bugatti 0.072881 0.097407 4.250000 4.382092 \n", - "Buick 0.075984 0.112750 4.292373 4.254791 \n", - "Cadillac 0.074504 0.128036 4.112500 4.160872 \n", - "Chevrolet 0.075338 0.136536 4.103125 3.971316 \n", - "GMC 0.059368 0.115138 4.084302 4.206823 \n", - "Honda 0.061904 0.138990 4.255435 4.059390 \n", - "Toyota 0.060387 0.109918 4.222826 4.311795 \n", - "Volkswagen 0.073617 0.139398 4.043478 3.980834 \n", - "chrysler 0.077105 0.126091 3.994898 4.036067 \n", - "dodge 0.061401 0.115340 4.259146 4.250613 \n", - "ferrari 0.055691 0.092777 4.684211 4.588468 \n", - "fiat 0.065926 0.135856 3.957500 4.083914 \n", - "ford 0.072558 0.116549 4.267857 4.332619 \n", - "genesis 0.056142 0.144689 4.076923 4.263187 \n", - "hummer 0.057429 0.106301 4.321429 4.332991 \n", - "hyundai 0.057827 0.136106 4.082500 4.064445 \n", - "infiniti 0.059517 0.111160 4.553977 4.423686 \n", - "isuzu 0.066575 0.114220 4.309211 4.228860 \n", - "jaguar 0.061614 0.106031 4.439815 4.317453 \n", - "jeep 0.063442 0.115510 3.899554 4.093507 \n", - "kia 0.066781 0.111330 4.219444 4.330955 \n", - "lamborghini 0.062690 0.083136 4.613636 4.633120 \n", - "land-rover 0.058127 0.101070 4.154762 4.383457 \n", - "lexus 0.050643 0.120852 4.349432 4.253620 \n", - "lincoln 0.069068 0.122796 4.494318 4.392293 \n", - "lotus 0.063595 0.132431 4.590909 4.478621 \n", - "maserati 0.060194 0.111714 4.439103 4.319154 \n", - "maybach 0.043509 0.071882 4.732143 4.710683 \n", - "mazda 0.075693 0.109664 4.346429 4.247244 \n", - "mclaren 0.068376 0.115666 5.000000 4.694830 \n", - "mercedes-benz 0.057229 0.122127 4.500000 4.275059 \n", - "mercury 0.060396 0.110053 4.466837 4.308947 \n", - "mini 0.063220 0.149629 4.161932 4.272955 \n", - "mitsubishi 0.058135 0.103309 4.517045 4.431931 \n", - "nissan 0.064131 0.135230 3.986364 4.001050 \n", - "pontiac 0.065794 0.110013 4.366848 4.285907 \n", - "porsche 0.062932 0.110313 4.500000 4.463378 \n", - "ram 0.055857 0.120195 3.731818 4.006690 \n", - "rolls-royce 0.072940 0.087974 4.375000 4.476755 \n", - "subaru 0.067235 0.122245 4.104167 4.261672 \n", - "suzuki 0.056956 0.111634 4.210366 4.307377 \n", - "tesla 0.051772 0.125053 4.194853 4.303805 \n", - "volvo 0.062025 0.130268 4.140957 4.079470 " + "AMGeneral 0.065171 0.021626 4.833333 4.264005 \n", + "Acura 0.064254 0.330192 4.538690 4.455716 \n", + "AlfaRomeo 0.088323 0.268080 4.187500 4.435494 \n", + "AstonMartin 0.063027 0.470149 4.613636 4.631018 \n", + "Audi 0.059860 0.303431 4.453431 4.421119 \n", + "BMW 0.072675 0.243201 4.468750 4.354955 \n", + "Bentley 0.057980 0.441103 4.239583 4.57587 \n", + "Bugatti 0.054105 0.430882 4.750000 4.718716 \n", + "Buick 0.086554 0.088404 4.162736 4.075432 \n", + "Cadillac 0.069962 0.274075 4.395408 4.335975 \n", + "Chevrolet 0.074343 0.173096 4.104730 4.23044 \n", + "GMC 0.067579 0.071438 4.089912 4.138499 \n", + "Honda 0.063076 0.118789 3.832386 4.130003 \n", + "Toyota 0.070333 0.154561 4.350543 4.243079 \n", + "Volkswagen 0.071180 0.130390 4.396875 4.17746 \n", + "chrysler 0.075663 0.086065 4.140957 4.168451 \n", + "dodge 0.076249 0.100277 4.133929 4.163826 \n", + "ferrari 0.082763 0.463863 4.767241 4.530158 \n", + "fiat 0.076235 0.087065 3.818878 4.134359 \n", + "ford 0.088316 0.078135 4.040094 4.005134 \n", + "genesis 0.057856 0.156253 4.608696 4.316763 \n", + "hummer 0.055950 0.297203 4.404605 4.462752 \n", + "hyundai 0.079915 0.161732 4.109375 4.14899 \n", + "infiniti 0.059761 0.322187 4.566860 4.393907 \n", + "isuzu 0.101334 0.205943 4.220238 4.306578 \n", + "jaguar 0.055537 0.375661 4.584091 4.497573 \n", + "jeep 0.079216 0.106891 4.108607 4.15378 \n", + "kia 0.070382 0.141621 4.141827 4.12064 \n", + "lamborghini 0.052919 0.657044 4.725000 4.665769 \n", + "land-rover 0.086169 0.080642 3.848837 4.0177 \n", + "lexus 0.076374 0.231606 4.306122 4.294433 \n", + "lincoln 0.075736 0.163414 4.269231 4.264042 \n", + "lotus 0.080445 0.307135 4.702381 4.490183 \n", + "maserati 0.071795 0.311256 4.431250 4.394369 \n", + "maybach 0.072357 0.633714 4.958333 4.733771 \n", + "mazda 0.062387 0.230268 4.479651 4.318612 \n", + "mercedes-benz 0.085045 0.091385 4.095745 4.094125 \n", + "mercury 0.063838 0.246360 4.311224 4.390451 \n", + "mini 0.071672 0.167861 4.036184 4.190949 \n", + "mitsubishi 0.070454 0.300219 4.346698 4.417798 \n", + "nissan 0.079956 0.102161 4.247093 4.119348 \n", + "pontiac 0.078449 0.165777 4.375000 4.221752 \n", + "porsche 0.080697 0.382931 4.662500 4.552274 \n", + "ram 0.076267 0.000294 3.861111 4.113366 \n", + "rolls-royce 0.072188 0.321649 4.843750 4.508778 \n", + "subaru 0.070802 0.301044 4.257212 4.327014 \n", + "suzuki 0.075242 0.114764 4.235119 4.255248 \n", + "tesla 0.066818 0.154607 4.673387 4.284923 \n", + "volvo 0.071134 0.220652 4.380814 4.281185 " ] }, "execution_count": 53, @@ -5893,7 +5432,10 @@ } ], "source": [ - "agg_grouped_test_set = test_set.groupby('Car_Make').agg(['mean'])\n", + "agg_grouped_test_set = (\n", + " test_set[sentiment_cols + ['Car_Make', 'Rating\\r', 'Predicted_Y']]\n", + " .groupby('Car_Make')\n", + " .agg(['mean']))\n", "agg_grouped_test_set" ] }, @@ -5949,87 +5491,87 @@ " \n", " \n", " AMGeneral\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", " 2\n", - " 2\n", - " 2\n", - " 2\n", - " 2\n", - " 2\n", - " 2\n", - " 2\n", - " 2\n", + " 3\n", + " 3\n", " \n", " \n", " Acura\n", - " 59\n", - " 59\n", - " 59\n", - " 59\n", - " 59\n", - " 59\n", - " 21\n", - " 59\n", - " 58\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 12\n", + " 42\n", + " 41\n", " \n", " \n", " AlfaRomeo\n", - " 18\n", - " 18\n", - " 18\n", - " 18\n", - " 18\n", - " 18\n", + " 16\n", + " 16\n", + " 16\n", + " 16\n", + " 16\n", + " 16\n", " 4\n", - " 18\n", - " 18\n", + " 16\n", + " 16\n", " \n", " \n", " AstonMartin\n", - " 23\n", - " 23\n", - " 23\n", - " 23\n", - " 23\n", - " 23\n", - " 12\n", - " 23\n", - " 23\n", + " 33\n", + " 33\n", + " 33\n", + " 33\n", + " 33\n", + " 33\n", + " 10\n", + " 33\n", + " 32\n", " \n", " \n", " Audi\n", - " 53\n", - " 53\n", - " 53\n", - " 53\n", - " 53\n", - " 53\n", - " 16\n", - " 53\n", - " 52\n", + " 51\n", + " 51\n", + " 51\n", + " 51\n", + " 51\n", + " 51\n", + " 15\n", + " 51\n", + " 47\n", " \n", " \n", " BMW\n", - " 45\n", - " 45\n", - " 45\n", - " 45\n", - " 45\n", - " 45\n", - " 12\n", - " 45\n", - " 45\n", + " 48\n", + " 48\n", + " 48\n", + " 48\n", + " 48\n", + " 48\n", + " 16\n", + " 48\n", + " 44\n", " \n", " \n", " Bentley\n", - " 37\n", - " 37\n", - " 37\n", - " 37\n", - " 37\n", - " 37\n", - " 13\n", - " 37\n", - " 37\n", + " 36\n", + " 36\n", + " 36\n", + " 36\n", + " 36\n", + " 36\n", + " 14\n", + " 36\n", + " 32\n", " \n", " \n", " Bugatti\n", @@ -6045,63 +5587,63 @@ " \n", " \n", " Buick\n", - " 59\n", - " 59\n", - " 59\n", - " 59\n", - " 59\n", - " 59\n", - " 18\n", - " 59\n", - " 55\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 19\n", + " 53\n", + " 51\n", " \n", " \n", " Cadillac\n", - " 40\n", - " 40\n", - " 40\n", - " 40\n", - " 40\n", - " 40\n", - " 16\n", - " 40\n", - " 40\n", + " 49\n", + " 49\n", + " 49\n", + " 49\n", + " 49\n", + " 49\n", + " 15\n", + " 49\n", + " 47\n", " \n", " \n", " Chevrolet\n", - " 40\n", - " 40\n", - " 40\n", - " 40\n", - " 40\n", - " 40\n", - " 15\n", - " 40\n", - " 40\n", + " 37\n", + " 37\n", + " 37\n", + " 37\n", + " 37\n", + " 37\n", + " 17\n", + " 37\n", + " 37\n", " \n", " \n", " GMC\n", - " 43\n", - " 43\n", - " 43\n", - " 43\n", - " 43\n", - " 43\n", - " 17\n", - " 43\n", - " 43\n", + " 57\n", + " 57\n", + " 57\n", + " 57\n", + " 57\n", + " 57\n", + " 21\n", + " 57\n", + " 55\n", " \n", " \n", " Honda\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 17\n", - " 46\n", - " 46\n", + " 44\n", + " 44\n", + " 44\n", + " 44\n", + " 44\n", + " 44\n", + " 18\n", + " 44\n", + " 42\n", " \n", " \n", " Toyota\n", @@ -6111,297 +5653,285 @@ " 46\n", " 46\n", " 46\n", - " 16\n", - " 46\n", + " 11\n", " 46\n", + " 43\n", " \n", " \n", " Volkswagen\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 18\n", - " 46\n", - " 44\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 15\n", + " 40\n", + " 36\n", " \n", " \n", " chrysler\n", - " 49\n", - " 49\n", - " 49\n", - " 49\n", - " 49\n", - " 49\n", - " 21\n", - " 49\n", - " 48\n", + " 46\n", + " 47\n", + " 47\n", + " 47\n", + " 47\n", + " 47\n", + " 19\n", + " 47\n", + " 47\n", " \n", " \n", " dodge\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 15\n", - " 41\n", - " 41\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 16\n", + " 42\n", + " 39\n", " \n", " \n", " ferrari\n", - " 19\n", - " 19\n", - " 19\n", - " 19\n", - " 19\n", - " 19\n", + " 29\n", + " 29\n", + " 29\n", + " 29\n", + " 29\n", + " 29\n", " 7\n", - " 19\n", - " 19\n", + " 29\n", + " 26\n", " \n", " \n", " fiat\n", - " 50\n", - " 50\n", - " 50\n", - " 50\n", - " 50\n", - " 50\n", - " 11\n", - " 50\n", - " 48\n", - " \n", - " \n", - " ford\n", " 49\n", " 49\n", " 49\n", " 49\n", " 49\n", " 49\n", - " 18\n", + " 11\n", " 49\n", - " 46\n", + " 48\n", + " \n", + " \n", + " ford\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 18\n", + " 53\n", + " 52\n", " \n", " \n", " genesis\n", - " 13\n", - " 13\n", - " 13\n", - " 13\n", - " 13\n", - " 13\n", - " 4\n", - " 13\n", - " 13\n", + " 23\n", + " 23\n", + " 23\n", + " 23\n", + " 23\n", + " 23\n", + " 3\n", + " 23\n", + " 22\n", " \n", " \n", " hummer\n", - " 42\n", - " 42\n", - " 42\n", - " 42\n", - " 42\n", - " 42\n", + " 38\n", + " 38\n", + " 38\n", + " 38\n", + " 38\n", + " 38\n", " 15\n", - " 42\n", - " 41\n", + " 38\n", + " 38\n", " \n", " \n", " hyundai\n", - " 50\n", - " 50\n", - " 50\n", - " 50\n", - " 50\n", - " 50\n", - " 17\n", - " 50\n", - " 49\n", + " 48\n", + " 48\n", + " 48\n", + " 48\n", + " 48\n", + " 48\n", + " 16\n", + " 48\n", + " 47\n", " \n", " \n", " infiniti\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 11\n", - " 44\n", - " 44\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 13\n", + " 43\n", + " 41\n", " \n", " \n", " isuzu\n", - " 38\n", - " 38\n", - " 38\n", - " 38\n", - " 38\n", - " 38\n", - " 15\n", - " 38\n", - " 38\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 42\n", + " 16\n", + " 42\n", + " 42\n", " \n", " \n", " jaguar\n", - " 54\n", - " 54\n", - " 54\n", - " 54\n", - " 54\n", - " 54\n", - " 15\n", - " 54\n", - " 50\n", + " 55\n", + " 55\n", + " 55\n", + " 55\n", + " 55\n", + " 55\n", + " 13\n", + " 55\n", + " 48\n", " \n", " \n", " jeep\n", - " 56\n", - " 56\n", - " 56\n", - " 56\n", - " 56\n", - " 56\n", - " 18\n", - " 56\n", - " 55\n", + " 61\n", + " 61\n", + " 61\n", + " 61\n", + " 61\n", + " 61\n", + " 20\n", + " 61\n", + " 60\n", " \n", " \n", " kia\n", - " 45\n", - " 45\n", - " 45\n", - " 45\n", - " 45\n", - " 45\n", - " 15\n", - " 45\n", - " 44\n", + " 52\n", + " 52\n", + " 52\n", + " 52\n", + " 52\n", + " 52\n", + " 20\n", + " 52\n", + " 50\n", " \n", " \n", " lamborghini\n", - " 11\n", - " 11\n", - " 11\n", - " 11\n", - " 11\n", - " 11\n", - " 5\n", - " 11\n", - " 11\n", + " 20\n", + " 20\n", + " 20\n", + " 20\n", + " 20\n", + " 20\n", + " 7\n", + " 20\n", + " 16\n", " \n", " \n", " land-rover\n", - " 42\n", - " 42\n", - " 42\n", - " 42\n", - " 42\n", - " 42\n", - " 14\n", - " 42\n", - " 41\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 21\n", + " 43\n", + " 43\n", " \n", " \n", " lexus\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", + " 48\n", + " 49\n", + " 49\n", + " 49\n", + " 49\n", + " 49\n", " 15\n", - " 44\n", - " 43\n", + " 49\n", + " 46\n", " \n", " \n", " lincoln\n", - " 43\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 12\n", - " 44\n", - " 44\n", - " \n", - " \n", - " lotus\n", - " 22\n", - " 22\n", - " 22\n", - " 22\n", - " 22\n", - " 22\n", - " 10\n", - " 22\n", - " 20\n", - " \n", - " \n", - " maserati\n", " 39\n", " 39\n", " 39\n", " 39\n", " 39\n", " 39\n", - " 13\n", + " 16\n", " 39\n", - " 37\n", + " 36\n", + " \n", + " \n", + " lotus\n", + " 21\n", + " 21\n", + " 21\n", + " 21\n", + " 21\n", + " 21\n", + " 9\n", + " 21\n", + " 21\n", + " \n", + " \n", + " maserati\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 13\n", + " 40\n", + " 38\n", " \n", " \n", " maybach\n", - " 7\n", - " 7\n", - " 7\n", - " 7\n", - " 7\n", - " 7\n", " 3\n", - " 7\n", - " 6\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 2\n", + " 3\n", + " 3\n", " \n", " \n", " mazda\n", - " 35\n", - " 35\n", - " 35\n", - " 35\n", - " 35\n", - " 35\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", " 14\n", - " 35\n", - " 33\n", - " \n", - " \n", - " mclaren\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", + " 43\n", + " 42\n", " \n", " \n", " mercedes-benz\n", - " 52\n", - " 52\n", - " 52\n", - " 52\n", - " 52\n", - " 52\n", - " 14\n", - " 52\n", - " 50\n", + " 47\n", + " 47\n", + " 47\n", + " 47\n", + " 47\n", + " 47\n", + " 19\n", + " 47\n", + " 45\n", " \n", " \n", " mercury\n", @@ -6411,141 +5941,141 @@ " 49\n", " 49\n", " 49\n", - " 13\n", + " 18\n", " 49\n", " 48\n", " \n", " \n", " mini\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 17\n", - " 44\n", - " 43\n", + " 38\n", + " 38\n", + " 38\n", + " 38\n", + " 38\n", + " 38\n", + " 15\n", + " 38\n", + " 33\n", " \n", " \n", " mitsubishi\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 44\n", - " 12\n", - " 44\n", - " 40\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 53\n", + " 18\n", + " 53\n", + " 51\n", " \n", " \n", " nissan\n", - " 55\n", - " 55\n", - " 55\n", - " 55\n", - " 55\n", - " 55\n", - " 17\n", - " 55\n", - " 55\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 18\n", + " 43\n", + " 43\n", " \n", " \n", " pontiac\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", - " 46\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", " 14\n", - " 46\n", - " 45\n", + " 40\n", + " 39\n", " \n", " \n", " porsche\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 12\n", - " 41\n", - " 39\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 40\n", + " 10\n", + " 40\n", + " 40\n", " \n", " \n", " ram\n", - " 55\n", - " 55\n", - " 55\n", - " 55\n", - " 55\n", - " 55\n", + " 34\n", + " 36\n", + " 36\n", + " 36\n", + " 36\n", + " 36\n", " 10\n", - " 55\n", - " 53\n", + " 36\n", + " 36\n", " \n", " \n", " rolls-royce\n", - " 8\n", - " 8\n", - " 8\n", - " 8\n", - " 8\n", - " 8\n", - " 5\n", - " 8\n", - " 8\n", + " 4\n", + " 4\n", + " 4\n", + " 4\n", + " 4\n", + " 4\n", + " 3\n", + " 4\n", + " 4\n", " \n", " \n", " subaru\n", + " 52\n", + " 52\n", + " 52\n", + " 52\n", + " 52\n", + " 52\n", + " 16\n", + " 52\n", + " 48\n", + " \n", + " \n", + " suzuki\n", " 42\n", " 42\n", " 42\n", " 42\n", " 42\n", " 42\n", - " 15\n", + " 16\n", " 42\n", - " 40\n", - " \n", - " \n", - " suzuki\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 41\n", - " 18\n", - " 41\n", " 41\n", " \n", " \n", " tesla\n", - " 34\n", - " 34\n", - " 34\n", - " 34\n", - " 34\n", - " 34\n", + " 31\n", + " 31\n", + " 31\n", + " 31\n", + " 31\n", + " 31\n", " 5\n", - " 34\n", - " 34\n", + " 31\n", + " 31\n", " \n", " \n", " volvo\n", - " 47\n", - " 47\n", - " 47\n", - " 47\n", - " 47\n", - " 47\n", - " 17\n", - " 47\n", - " 47\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 43\n", + " 15\n", + " 43\n", + " 42\n", " \n", " \n", "\n", @@ -6554,162 +6084,159 @@ "text/plain": [ " sentiment.score emotion.sadness emotion.joy emotion.fear \\\n", "Car_Make \n", - "AMGeneral 2 2 2 2 \n", - "Acura 59 59 59 59 \n", - "AlfaRomeo 18 18 18 18 \n", - "AstonMartin 23 23 23 23 \n", - "Audi 53 53 53 53 \n", - "BMW 45 45 45 45 \n", - "Bentley 37 37 37 37 \n", + "AMGeneral 3 3 3 3 \n", + "Acura 42 42 42 42 \n", + "AlfaRomeo 16 16 16 16 \n", + "AstonMartin 33 33 33 33 \n", + "Audi 51 51 51 51 \n", + "BMW 48 48 48 48 \n", + "Bentley 36 36 36 36 \n", "Bugatti 1 1 1 1 \n", - "Buick 59 59 59 59 \n", - "Cadillac 40 40 40 40 \n", - "Chevrolet 40 40 40 40 \n", - "GMC 43 43 43 43 \n", - "Honda 46 46 46 46 \n", + "Buick 53 53 53 53 \n", + "Cadillac 49 49 49 49 \n", + "Chevrolet 37 37 37 37 \n", + "GMC 57 57 57 57 \n", + "Honda 44 44 44 44 \n", "Toyota 46 46 46 46 \n", - "Volkswagen 46 46 46 46 \n", - "chrysler 49 49 49 49 \n", - "dodge 41 41 41 41 \n", - "ferrari 19 19 19 19 \n", - "fiat 50 50 50 50 \n", - "ford 49 49 49 49 \n", - "genesis 13 13 13 13 \n", - "hummer 42 42 42 42 \n", - "hyundai 50 50 50 50 \n", - "infiniti 44 44 44 44 \n", - "isuzu 38 38 38 38 \n", - "jaguar 54 54 54 54 \n", - "jeep 56 56 56 56 \n", - "kia 45 45 45 45 \n", - "lamborghini 11 11 11 11 \n", - "land-rover 42 42 42 42 \n", - "lexus 44 44 44 44 \n", - "lincoln 43 44 44 44 \n", - "lotus 22 22 22 22 \n", - "maserati 39 39 39 39 \n", - "maybach 7 7 7 7 \n", - "mazda 35 35 35 35 \n", - "mclaren 1 1 1 1 \n", - "mercedes-benz 52 52 52 52 \n", + "Volkswagen 40 40 40 40 \n", + "chrysler 46 47 47 47 \n", + "dodge 42 42 42 42 \n", + "ferrari 29 29 29 29 \n", + "fiat 49 49 49 49 \n", + "ford 53 53 53 53 \n", + "genesis 23 23 23 23 \n", + "hummer 38 38 38 38 \n", + "hyundai 48 48 48 48 \n", + "infiniti 43 43 43 43 \n", + "isuzu 42 42 42 42 \n", + "jaguar 55 55 55 55 \n", + "jeep 61 61 61 61 \n", + "kia 52 52 52 52 \n", + "lamborghini 20 20 20 20 \n", + "land-rover 43 43 43 43 \n", + "lexus 48 49 49 49 \n", + "lincoln 39 39 39 39 \n", + "lotus 21 21 21 21 \n", + "maserati 40 40 40 40 \n", + "maybach 3 3 3 3 \n", + "mazda 43 43 43 43 \n", + "mercedes-benz 47 47 47 47 \n", "mercury 49 49 49 49 \n", - "mini 44 44 44 44 \n", - "mitsubishi 44 44 44 44 \n", - "nissan 55 55 55 55 \n", - "pontiac 46 46 46 46 \n", - "porsche 41 41 41 41 \n", - "ram 55 55 55 55 \n", - "rolls-royce 8 8 8 8 \n", - "subaru 42 42 42 42 \n", - "suzuki 41 41 41 41 \n", - "tesla 34 34 34 34 \n", - "volvo 47 47 47 47 \n", + "mini 38 38 38 38 \n", + "mitsubishi 53 53 53 53 \n", + "nissan 43 43 43 43 \n", + "pontiac 40 40 40 40 \n", + "porsche 40 40 40 40 \n", + "ram 34 36 36 36 \n", + "rolls-royce 4 4 4 4 \n", + "subaru 52 52 52 52 \n", + "suzuki 42 42 42 42 \n", + "tesla 31 31 31 31 \n", + "volvo 43 43 43 43 \n", "\n", " emotion.disgust emotion.anger Rating\\r Review_Content \\\n", "Car_Make \n", - "AMGeneral 2 2 2 2 \n", - "Acura 59 59 21 59 \n", - "AlfaRomeo 18 18 4 18 \n", - "AstonMartin 23 23 12 23 \n", - "Audi 53 53 16 53 \n", - "BMW 45 45 12 45 \n", - "Bentley 37 37 13 37 \n", + "AMGeneral 3 3 2 3 \n", + "Acura 42 42 12 42 \n", + "AlfaRomeo 16 16 4 16 \n", + "AstonMartin 33 33 10 33 \n", + "Audi 51 51 15 51 \n", + "BMW 48 48 16 48 \n", + "Bentley 36 36 14 36 \n", "Bugatti 1 1 1 1 \n", - "Buick 59 59 18 59 \n", - "Cadillac 40 40 16 40 \n", - "Chevrolet 40 40 15 40 \n", - "GMC 43 43 17 43 \n", - "Honda 46 46 17 46 \n", - "Toyota 46 46 16 46 \n", - "Volkswagen 46 46 18 46 \n", - "chrysler 49 49 21 49 \n", - "dodge 41 41 15 41 \n", - "ferrari 19 19 7 19 \n", - "fiat 50 50 11 50 \n", - "ford 49 49 18 49 \n", - "genesis 13 13 4 13 \n", - "hummer 42 42 15 42 \n", - "hyundai 50 50 17 50 \n", - "infiniti 44 44 11 44 \n", - "isuzu 38 38 15 38 \n", - "jaguar 54 54 15 54 \n", - "jeep 56 56 18 56 \n", - "kia 45 45 15 45 \n", - "lamborghini 11 11 5 11 \n", - "land-rover 42 42 14 42 \n", - "lexus 44 44 15 44 \n", - "lincoln 44 44 12 44 \n", - "lotus 22 22 10 22 \n", - "maserati 39 39 13 39 \n", - "maybach 7 7 3 7 \n", - "mazda 35 35 14 35 \n", - "mclaren 1 1 1 1 \n", - "mercedes-benz 52 52 14 52 \n", - "mercury 49 49 13 49 \n", - "mini 44 44 17 44 \n", - "mitsubishi 44 44 12 44 \n", - "nissan 55 55 17 55 \n", - "pontiac 46 46 14 46 \n", - "porsche 41 41 12 41 \n", - "ram 55 55 10 55 \n", - "rolls-royce 8 8 5 8 \n", - "subaru 42 42 15 42 \n", - "suzuki 41 41 18 41 \n", - "tesla 34 34 5 34 \n", - "volvo 47 47 17 47 \n", + "Buick 53 53 19 53 \n", + "Cadillac 49 49 15 49 \n", + "Chevrolet 37 37 17 37 \n", + "GMC 57 57 21 57 \n", + "Honda 44 44 18 44 \n", + "Toyota 46 46 11 46 \n", + "Volkswagen 40 40 15 40 \n", + "chrysler 47 47 19 47 \n", + "dodge 42 42 16 42 \n", + "ferrari 29 29 7 29 \n", + "fiat 49 49 11 49 \n", + "ford 53 53 18 53 \n", + "genesis 23 23 3 23 \n", + "hummer 38 38 15 38 \n", + "hyundai 48 48 16 48 \n", + "infiniti 43 43 13 43 \n", + "isuzu 42 42 16 42 \n", + "jaguar 55 55 13 55 \n", + "jeep 61 61 20 61 \n", + "kia 52 52 20 52 \n", + "lamborghini 20 20 7 20 \n", + "land-rover 43 43 21 43 \n", + "lexus 49 49 15 49 \n", + "lincoln 39 39 16 39 \n", + "lotus 21 21 9 21 \n", + "maserati 40 40 13 40 \n", + "maybach 3 3 2 3 \n", + "mazda 43 43 14 43 \n", + "mercedes-benz 47 47 19 47 \n", + "mercury 49 49 18 49 \n", + "mini 38 38 15 38 \n", + "mitsubishi 53 53 18 53 \n", + "nissan 43 43 18 43 \n", + "pontiac 40 40 14 40 \n", + "porsche 40 40 10 40 \n", + "ram 36 36 10 36 \n", + "rolls-royce 4 4 3 4 \n", + "subaru 52 52 16 52 \n", + "suzuki 42 42 16 42 \n", + "tesla 31 31 5 31 \n", + "volvo 43 43 15 43 \n", "\n", " Predicted_Y \n", "Car_Make \n", - "AMGeneral 2 \n", - "Acura 58 \n", - "AlfaRomeo 18 \n", - "AstonMartin 23 \n", - "Audi 52 \n", - "BMW 45 \n", - "Bentley 37 \n", + "AMGeneral 3 \n", + "Acura 41 \n", + "AlfaRomeo 16 \n", + "AstonMartin 32 \n", + "Audi 47 \n", + "BMW 44 \n", + "Bentley 32 \n", "Bugatti 1 \n", - "Buick 55 \n", - "Cadillac 40 \n", - "Chevrolet 40 \n", - "GMC 43 \n", - "Honda 46 \n", - "Toyota 46 \n", - "Volkswagen 44 \n", - "chrysler 48 \n", - "dodge 41 \n", - "ferrari 19 \n", + "Buick 51 \n", + "Cadillac 47 \n", + "Chevrolet 37 \n", + "GMC 55 \n", + "Honda 42 \n", + "Toyota 43 \n", + "Volkswagen 36 \n", + "chrysler 47 \n", + "dodge 39 \n", + "ferrari 26 \n", "fiat 48 \n", - "ford 46 \n", - "genesis 13 \n", - "hummer 41 \n", - "hyundai 49 \n", - "infiniti 44 \n", - "isuzu 38 \n", - "jaguar 50 \n", - "jeep 55 \n", - "kia 44 \n", - "lamborghini 11 \n", - "land-rover 41 \n", - "lexus 43 \n", - "lincoln 44 \n", - "lotus 20 \n", - "maserati 37 \n", - "maybach 6 \n", - "mazda 33 \n", - "mclaren 1 \n", - "mercedes-benz 50 \n", + "ford 52 \n", + "genesis 22 \n", + "hummer 38 \n", + "hyundai 47 \n", + "infiniti 41 \n", + "isuzu 42 \n", + "jaguar 48 \n", + "jeep 60 \n", + "kia 50 \n", + "lamborghini 16 \n", + "land-rover 43 \n", + "lexus 46 \n", + "lincoln 36 \n", + "lotus 21 \n", + "maserati 38 \n", + "maybach 3 \n", + "mazda 42 \n", + "mercedes-benz 45 \n", "mercury 48 \n", - "mini 43 \n", - "mitsubishi 40 \n", - "nissan 55 \n", - "pontiac 45 \n", - "porsche 39 \n", - "ram 53 \n", - "rolls-royce 8 \n", - "subaru 40 \n", + "mini 33 \n", + "mitsubishi 51 \n", + "nissan 43 \n", + "pontiac 39 \n", + "porsche 40 \n", + "ram 36 \n", + "rolls-royce 4 \n", + "subaru 48 \n", "suzuki 41 \n", - "tesla 34 \n", - "volvo 47 " + "tesla 31 \n", + "volvo 42 " ] }, "execution_count": 54, @@ -6733,8 +6260,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "R-Squared = 0.8021197630035817\n", - "Mean Squared Error = 0.016904870283648715\n" + "R-Squared = 0.5833818585164156\n", + "Mean Squared Error = 0.03221873091669909\n" ] } ], @@ -6802,253 +6329,248 @@ " \n", " \n", " AMGeneral\n", - " 3.000000\n", - " 3.194602\n", + " 4.833333\n", + " 4.264005\n", " \n", " \n", " Acura\n", - " 4.209746\n", - " 4.227164\n", + " 4.538690\n", + " 4.455716\n", " \n", " \n", " AlfaRomeo\n", - " 4.388889\n", - " 4.322972\n", + " 4.187500\n", + " 4.435494\n", " \n", " \n", " AstonMartin\n", - " 4.326087\n", - " 4.448320\n", + " 4.613636\n", + " 4.631018\n", " \n", " \n", " Audi\n", - " 4.445755\n", - " 4.365891\n", + " 4.453431\n", + " 4.421119\n", " \n", " \n", " BMW\n", - " 4.513889\n", - " 4.293852\n", + " 4.468750\n", + " 4.354955\n", " \n", " \n", " Bentley\n", - " 4.229730\n", - " 4.410651\n", + " 4.239583\n", + " 4.57587\n", " \n", " \n", " Bugatti\n", - " 4.250000\n", - " 4.382092\n", + " 4.750000\n", + " 4.718716\n", " \n", " \n", " Buick\n", - " 4.292373\n", - " 4.254791\n", + " 4.162736\n", + " 4.075432\n", " \n", " \n", " Cadillac\n", - " 4.112500\n", - " 4.160872\n", + " 4.395408\n", + " 4.335975\n", " \n", " \n", " Chevrolet\n", - " 4.103125\n", - " 3.971316\n", + " 4.104730\n", + " 4.23044\n", " \n", " \n", " GMC\n", - " 4.084302\n", - " 4.206823\n", + " 4.089912\n", + " 4.138499\n", " \n", " \n", " Honda\n", - " 4.255435\n", - " 4.059390\n", + " 3.832386\n", + " 4.130003\n", " \n", " \n", " Toyota\n", - " 4.222826\n", - " 4.311795\n", + " 4.350543\n", + " 4.243079\n", " \n", " \n", " Volkswagen\n", - " 4.043478\n", - " 3.980834\n", + " 4.396875\n", + " 4.17746\n", " \n", " \n", " chrysler\n", - " 3.994898\n", - " 4.036067\n", + " 4.140957\n", + " 4.168451\n", " \n", " \n", " dodge\n", - " 4.259146\n", - " 4.250613\n", + " 4.133929\n", + " 4.163826\n", " \n", " \n", " ferrari\n", - " 4.684211\n", - " 4.588468\n", + " 4.767241\n", + " 4.530158\n", " \n", " \n", " fiat\n", - " 3.957500\n", - " 4.083914\n", + " 3.818878\n", + " 4.134359\n", " \n", " \n", " ford\n", - " 4.267857\n", - " 4.332619\n", + " 4.040094\n", + " 4.005134\n", " \n", " \n", " genesis\n", - " 4.076923\n", - " 4.263187\n", + " 4.608696\n", + " 4.316763\n", " \n", " \n", " hummer\n", - " 4.321429\n", - " 4.332991\n", + " 4.404605\n", + " 4.462752\n", " \n", " \n", " hyundai\n", - " 4.082500\n", - " 4.064445\n", + " 4.109375\n", + " 4.14899\n", " \n", " \n", " infiniti\n", - " 4.553977\n", - " 4.423686\n", + " 4.566860\n", + " 4.393907\n", " \n", " \n", " isuzu\n", - " 4.309211\n", - " 4.228860\n", + " 4.220238\n", + " 4.306578\n", " \n", " \n", " jaguar\n", - " 4.439815\n", - " 4.317453\n", + " 4.584091\n", + " 4.497573\n", " \n", " \n", " jeep\n", - " 3.899554\n", - " 4.093507\n", + " 4.108607\n", + " 4.15378\n", " \n", " \n", " kia\n", - " 4.219444\n", - " 4.330955\n", + " 4.141827\n", + " 4.12064\n", " \n", " \n", " lamborghini\n", - " 4.613636\n", - " 4.633120\n", + " 4.725000\n", + " 4.665769\n", " \n", " \n", " land-rover\n", - " 4.154762\n", - " 4.383457\n", + " 3.848837\n", + " 4.0177\n", " \n", " \n", " lexus\n", - " 4.349432\n", - " 4.253620\n", + " 4.306122\n", + " 4.294433\n", " \n", " \n", " lincoln\n", - " 4.494318\n", - " 4.392293\n", + " 4.269231\n", + " 4.264042\n", " \n", " \n", " lotus\n", - " 4.590909\n", - " 4.478621\n", + " 4.702381\n", + " 4.490183\n", " \n", " \n", " maserati\n", - " 4.439103\n", - " 4.319154\n", + " 4.431250\n", + " 4.394369\n", " \n", " \n", " maybach\n", - " 4.732143\n", - " 4.710683\n", + " 4.958333\n", + " 4.733771\n", " \n", " \n", " mazda\n", - " 4.346429\n", - " 4.247244\n", - " \n", - " \n", - " mclaren\n", - " 5.000000\n", - " 4.694830\n", + " 4.479651\n", + " 4.318612\n", " \n", " \n", " mercedes-benz\n", - " 4.500000\n", - " 4.275059\n", + " 4.095745\n", + " 4.094125\n", " \n", " \n", " mercury\n", - " 4.466837\n", - " 4.308947\n", + " 4.311224\n", + " 4.390451\n", " \n", " \n", " mini\n", - " 4.161932\n", - " 4.272955\n", + " 4.036184\n", + " 4.190949\n", " \n", " \n", " mitsubishi\n", - " 4.517045\n", - " 4.431931\n", + " 4.346698\n", + " 4.417798\n", " \n", " \n", " nissan\n", - " 3.986364\n", - " 4.001050\n", + " 4.247093\n", + " 4.119348\n", " \n", " \n", " pontiac\n", - " 4.366848\n", - " 4.285907\n", + " 4.375000\n", + " 4.221752\n", " \n", " \n", " porsche\n", - " 4.500000\n", - " 4.463378\n", + " 4.662500\n", + " 4.552274\n", " \n", " \n", " ram\n", - " 3.731818\n", - " 4.006690\n", + " 3.861111\n", + " 4.113366\n", " \n", " \n", " rolls-royce\n", - " 4.375000\n", - " 4.476755\n", + " 4.843750\n", + " 4.508778\n", " \n", " \n", " subaru\n", - " 4.104167\n", - " 4.261672\n", + " 4.257212\n", + " 4.327014\n", " \n", " \n", " suzuki\n", - " 4.210366\n", - " 4.307377\n", + " 4.235119\n", + " 4.255248\n", " \n", " \n", " tesla\n", - " 4.194853\n", - " 4.303805\n", + " 4.673387\n", + " 4.284923\n", " \n", " \n", " volvo\n", - " 4.140957\n", - " 4.079470\n", + " 4.380814\n", + " 4.281185\n", " \n", " \n", "\n", @@ -7058,56 +6580,55 @@ " Rating\\r Predicted_Y\n", " mean mean\n", "Car_Make \n", - "AMGeneral 3.000000 3.194602\n", - "Acura 4.209746 4.227164\n", - "AlfaRomeo 4.388889 4.322972\n", - "AstonMartin 4.326087 4.448320\n", - "Audi 4.445755 4.365891\n", - "BMW 4.513889 4.293852\n", - "Bentley 4.229730 4.410651\n", - "Bugatti 4.250000 4.382092\n", - "Buick 4.292373 4.254791\n", - "Cadillac 4.112500 4.160872\n", - "Chevrolet 4.103125 3.971316\n", - "GMC 4.084302 4.206823\n", - "Honda 4.255435 4.059390\n", - "Toyota 4.222826 4.311795\n", - "Volkswagen 4.043478 3.980834\n", - "chrysler 3.994898 4.036067\n", - "dodge 4.259146 4.250613\n", - "ferrari 4.684211 4.588468\n", - "fiat 3.957500 4.083914\n", - "ford 4.267857 4.332619\n", - "genesis 4.076923 4.263187\n", - "hummer 4.321429 4.332991\n", - "hyundai 4.082500 4.064445\n", - "infiniti 4.553977 4.423686\n", - "isuzu 4.309211 4.228860\n", - "jaguar 4.439815 4.317453\n", - "jeep 3.899554 4.093507\n", - "kia 4.219444 4.330955\n", - "lamborghini 4.613636 4.633120\n", - "land-rover 4.154762 4.383457\n", - "lexus 4.349432 4.253620\n", - "lincoln 4.494318 4.392293\n", - "lotus 4.590909 4.478621\n", - "maserati 4.439103 4.319154\n", - "maybach 4.732143 4.710683\n", - "mazda 4.346429 4.247244\n", - "mclaren 5.000000 4.694830\n", - "mercedes-benz 4.500000 4.275059\n", - "mercury 4.466837 4.308947\n", - "mini 4.161932 4.272955\n", - "mitsubishi 4.517045 4.431931\n", - "nissan 3.986364 4.001050\n", - "pontiac 4.366848 4.285907\n", - "porsche 4.500000 4.463378\n", - "ram 3.731818 4.006690\n", - "rolls-royce 4.375000 4.476755\n", - "subaru 4.104167 4.261672\n", - "suzuki 4.210366 4.307377\n", - "tesla 4.194853 4.303805\n", - "volvo 4.140957 4.079470" + "AMGeneral 4.833333 4.264005\n", + "Acura 4.538690 4.455716\n", + "AlfaRomeo 4.187500 4.435494\n", + "AstonMartin 4.613636 4.631018\n", + "Audi 4.453431 4.421119\n", + "BMW 4.468750 4.354955\n", + "Bentley 4.239583 4.57587\n", + "Bugatti 4.750000 4.718716\n", + "Buick 4.162736 4.075432\n", + "Cadillac 4.395408 4.335975\n", + "Chevrolet 4.104730 4.23044\n", + "GMC 4.089912 4.138499\n", + "Honda 3.832386 4.130003\n", + "Toyota 4.350543 4.243079\n", + "Volkswagen 4.396875 4.17746\n", + "chrysler 4.140957 4.168451\n", + "dodge 4.133929 4.163826\n", + "ferrari 4.767241 4.530158\n", + "fiat 3.818878 4.134359\n", + "ford 4.040094 4.005134\n", + "genesis 4.608696 4.316763\n", + "hummer 4.404605 4.462752\n", + "hyundai 4.109375 4.14899\n", + "infiniti 4.566860 4.393907\n", + "isuzu 4.220238 4.306578\n", + "jaguar 4.584091 4.497573\n", + "jeep 4.108607 4.15378\n", + "kia 4.141827 4.12064\n", + "lamborghini 4.725000 4.665769\n", + "land-rover 3.848837 4.0177\n", + "lexus 4.306122 4.294433\n", + "lincoln 4.269231 4.264042\n", + "lotus 4.702381 4.490183\n", + "maserati 4.431250 4.394369\n", + "maybach 4.958333 4.733771\n", + "mazda 4.479651 4.318612\n", + "mercedes-benz 4.095745 4.094125\n", + "mercury 4.311224 4.390451\n", + "mini 4.036184 4.190949\n", + "mitsubishi 4.346698 4.417798\n", + "nissan 4.247093 4.119348\n", + "pontiac 4.375000 4.221752\n", + "porsche 4.662500 4.552274\n", + "ram 3.861111 4.113366\n", + "rolls-royce 4.843750 4.508778\n", + "subaru 4.257212 4.327014\n", + "suzuki 4.235119 4.255248\n", + "tesla 4.673387 4.284923\n", + "volvo 4.380814 4.281185" ] }, "execution_count": 56, @@ -7122,25 +6643,53 @@ { "cell_type": "code", "execution_count": 57, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "emotion.sadness mean float64\n", + "emotion.joy mean float64\n", + "emotion.fear mean float64\n", + "emotion.disgust mean float64\n", + "emotion.anger mean float64\n", + "sentiment.score mean float64\n", + "Rating\\r mean float64\n", + "Predicted_Y mean object\n", + "dtype: object" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agg_grouped_test_set.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "y = 0.71x + 1.24\n" + "y = 0.51x + 2.10\n" ] }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -7153,7 +6702,8 @@ "pylab.ylabel('Rating Predicted By Model')\n", "\n", "# calc the trendline\n", - "z = np.polyfit(np.squeeze(agg_grouped_test_set['Rating\\r']), np.squeeze(agg_grouped_test_set['Predicted_Y']), 1)\n", + "z = np.polyfit(np.squeeze(agg_grouped_test_set['Rating\\r']),\n", + " np.squeeze(agg_grouped_test_set['Predicted_Y'].astype(float)), 1)\n", "p = np.poly1d(z)\n", "pylab.plot(agg_grouped_test_set['Predicted_Y'],p(agg_grouped_test_set['Predicted_Y']),\"r--\")\n", "pylab.title(\"Rating From Dataset Vs Rating Predicted By Model\")\n", @@ -7196,7 +6746,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/notebooks/Text_Extensions_for_Pandas_Overview.ipynb b/notebooks/Text_Extensions_for_Pandas_Overview.ipynb index 076aa5a8..20b9db6a 100644 --- a/notebooks/Text_Extensions_for_Pandas_Overview.ipynb +++ b/notebooks/Text_Extensions_for_Pandas_Overview.ipynb @@ -298,7 +298,7 @@ "
Announcements
IBM Study: Majority of Surveyed Companies are Not Prepared for IT Needs of the Future, Say U.S. and U.K. Tech Leaders
- Nearly a quarter of CIOs and CTOs surveyed say they are just starting their IT modernization journey or have yet to begin modernizing.
- To meet these needs, nearly 80 percent of leaders surveyed say there will be a higher reliance on partners that can provide managed infrastructure services.

ARMONK, N.Y., Jan. 4, 2021 /PRNewswire/ -- Many corporate IT leaders say their organizations are not prepared for the future IT needs of the business and nearly all are moving to advance their transition to cloud infrastructure, according to a new IBM (NYSE: IBM) survey of leaders at mid-sized and large companies in the United States and United Kingdom.

\"IBM

Of the 380 CIOs and CTOs who participated in the survey, 60% say their company's IT modernization program is not yet ready for the future, according to the recently completed The State of IT Transformation Study conducted by the Managed Infrastructure Services unit of IBM's Global Technology Services division. Nearly a quarter of CIOs and CTOs (24%) surveyed say their company is just starting its IT modernization journey or has yet to begin modernizing, with about a third surveyed saying they are still in the [...]" + " IBM Study: Majority of Surveyed Companies are Not Prepared for IT Needs of the Future, Say U.S. and U.K. Tech Leaders - Jan 4, 2021

IBM Newsroom

IBM Study: Majority of Surveyed Companies are Not Prepared for IT Needs of the Future, Say U.S. and U.K. Tech Leaders
- Nearly a quarter of CIOs and CTOs surveyed say they are just starting their IT modernization journey or have yet to begin modernizing.
- To meet these needs, nearly 80 percent of leaders surveyed say there will be a higher reliance on partners that can provide managed infrastructure services.

ARMONK, N.Y., Jan. 4, 2021 /PRNewswire/ -- Many corporate IT leaders say their organizations are not prepared for the future IT needs of the business and nearly all are moving to advance their transition to cloud infrastructure, according to a new IBM (NYSE: IBM) survey of leaders at mid-sized and large companies in the United States and United Kingdom.

\"IBM

Of the 380 CIOs and CTOs who participated in the survey, 60% say their company's IT modernization program is not yet ready for the future, according to the recently completed The State of IT Transformation Study conducted by the Managed Infrastructure Services unit of IBM's Global Technology Services division. Nearly a quarter of CIOs and CTOs (24%) surveyed say their company is just starting its IT modernization journey or has yet to begin modernizing, with about a third surveyed saying they are still in the midst of [...]" ], "text/plain": [ "" @@ -4774,7 +4774,7 @@ "output_type": "stream", "text": [ "\n", - "Dependency parsing took 0.4 sec.\n" + "Dependency parsing took 0.3 sec.\n" ] }, { @@ -4845,15 +4845,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Extract entities and semantic roles: 218.7 sec before and 218.7 sec after\n", - " Identify persons quoted by name: 5.1 sec before and 5.1 sec after\n", - " Perform dependency parsing: 640.5 sec before and 73.3 sec after\n", - " Extract titles of persons: 11.0 sec before and 11.0 sec after\n", + "Extract entities and semantic roles: 530.9 sec before and 530.9 sec after\n", + " Identify persons quoted by name: 3.7 sec before and 3.7 sec after\n", + " Perform dependency parsing: 843.8 sec before and 71.4 sec after\n", + " Extract titles of persons: 8.8 sec before and 8.8 sec after\n", " Combine results across documents: 0.1 sec before and 0.1 sec after\n", "\n", "\n", - "Total time before: 875.3122136230469\n", - "Total time after: 308.0950550670624\n" + "Total time before: 1387.3444835777282\n", + "Total time after: 614.8777334327698\n" ] } ], @@ -4901,14 +4901,12 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "

" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -4977,7 +4975,7 @@ " step_3_results = mi.perform_dependency_parsing(step_1_results[\"analyzed_text\"],\n", " spacy_language_model)\n", " step_3_time = time.time()\n", - " \n", + "\n", " step_3a_results = mi.perform_targeted_dependency_parsing(step_2_results[\"person\"],\n", " spacy_language_model)\n", " step_3a_time = time.time()\n", @@ -5002,6 +5000,13 @@ " \n", " pd.DataFrame.from_records(timings).to_csv(\"ibm_press_release_timings.csv\")\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -5020,7 +5025,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/tutorials/market/SciPy_Demo_0.ipynb b/tutorials/market/SciPy_Demo_0.ipynb index 545e7838..149c731d 100644 --- a/tutorials/market/SciPy_Demo_0.ipynb +++ b/tutorials/market/SciPy_Demo_0.ipynb @@ -23,6 +23,7 @@ "from typing import *\n", "import json\n", "import os\n", + "import shutil\n", "import ibm_watson\n", "import ibm_watson.natural_language_understanding_v1 as nlu\n", "import ibm_cloud_sdk_core\n", @@ -120,30 +121,22 @@ "text/plain": [ "[{'type': 'Person',\n", " 'text': 'Christoph Herman',\n", - " 'relevance': 0.217154,\n", + " 'relevance': 0.357379,\n", " 'mentions': [{'text': 'Christoph Herman',\n", " 'location': [1213, 1229],\n", - " 'confidence': 0.94435}],\n", + " 'confidence': 0.992954}],\n", " 'count': 1,\n", - " 'confidence': 0.94435},\n", + " 'confidence': 0.992954},\n", " {'type': 'Person',\n", " 'text': 'Stephen Leonard',\n", - " 'relevance': 0.136166,\n", + " 'relevance': 0.225795,\n", " 'mentions': [{'text': 'Stephen Leonard',\n", " 'location': [2227, 2242],\n", - " 'confidence': 0.989177}],\n", + " 'confidence': 0.99548}],\n", " 'disambiguation': {'name': 'Steve_Leonard',\n", " 'dbpedia_resource': 'http://dbpedia.org/resource/Steve_Leonard'},\n", " 'count': 1,\n", - " 'confidence': 0.989177},\n", - " {'type': 'Person',\n", - " 'text': 'Sam Ponedal',\n", - " 'relevance': 0.020711,\n", - " 'mentions': [{'text': 'Sam Ponedal',\n", - " 'location': [3574, 3585],\n", - " 'confidence': 0.894298}],\n", - " 'count': 1,\n", - " 'confidence': 0.894298}]" + " 'confidence': 0.99548}]" ] }, "execution_count": 5, @@ -191,19 +184,14 @@ " \n", " \n", " \n", - " 38\n", + " 26\n", " [1213, 1229): 'Christoph Herman'\n", - " 0.944350\n", + " 0.992954\n", " \n", " \n", - " 41\n", + " 31\n", " [2227, 2242): 'Stephen Leonard'\n", - " 0.989177\n", - " \n", - " \n", - " 48\n", - " [3574, 3585): 'Sam Ponedal'\n", - " 0.894298\n", + " 0.995480\n", " \n", " \n", "\n", @@ -211,9 +199,8 @@ ], "text/plain": [ " person confidence\n", - "38 [1213, 1229): 'Christoph Herman' 0.944350\n", - "41 [2227, 2242): 'Stephen Leonard' 0.989177\n", - "48 [3574, 3585): 'Sam Ponedal' 0.894298" + "26 [1213, 1229): 'Christoph Herman' 0.992954\n", + "31 [2227, 2242): 'Stephen Leonard' 0.995480" ] }, "execution_count": 6, @@ -271,9 +258,9 @@ " \n", " \n", " 1\n", - " [2227, 2519): 'Stephen Leonard, General Manage...\n", + " [2227, 2282): 'Stephen Leonard, General Manage...\n", " said\n", - " [2028, 2219): 'In June, IBM announced the avai...\n", + " [2352, 2519): ', we're giving our clients more...\n", " \n", " \n", "\n", @@ -282,11 +269,11 @@ "text/plain": [ " subject verb \\\n", "0 [1213, 1281): 'Christoph Herman, SVP and Head ... said \n", - "1 [2227, 2519): 'Stephen Leonard, General Manage... said \n", + "1 [2227, 2282): 'Stephen Leonard, General Manage... said \n", "\n", " object \n", "0 [937, 1205): 'SAP HANA Enterprise Cloud on IBM... \n", - "1 [2028, 2219): 'In June, IBM announced the avai... " + "1 [2352, 2519): ', we're giving our clients more... " ] }, "execution_count": 7, @@ -327,17 +314,17 @@ "[{'subject': {'text': 'Christoph Herman, SVP and Head of SAP HANA Enterprise Cloud Delivery',\n", " 'begin': 1213,\n", " 'end': 1281},\n", - " 'sentence': ' \"SAP HANA Enterprise Cloud on IBM Power Systems will help clients unlock the full value of SAP HANA in the cloud, with the possibility of enhancing the scalability and availability of mission critical SAP applications while moving workloads to SAP HANA and lowering TCO,\" said Christoph Herman, SVP and Head of SAP HANA Enterprise Cloud Delivery.',\n", + " 'sentence': '\"SAP HANA Enterprise Cloud on IBM Power Systems will help clients unlock the full value of SAP HANA in the cloud, with the possibility of enhancing the scalability and availability of mission critical SAP applications while moving workloads to SAP HANA and lowering TCO,\" said Christoph Herman, SVP and Head of SAP HANA Enterprise Cloud Delivery.',\n", " 'object': {'text': 'SAP HANA Enterprise Cloud on IBM Power Systems will help clients unlock the full value of SAP HANA in the cloud, with the possibility of enhancing the scalability and availability of mission critical SAP applications while moving workloads to SAP HANA and lowering TCO'},\n", - " 'action': {'verb': {'text': 'say', 'tense': 'past'},\n", + " 'action': {'verb': {'text': 'say', 'tense': 'future'},\n", " 'text': 'said',\n", " 'normalized': 'say'}},\n", - " {'subject': {'text': 'Stephen Leonard, General Manager, IBM Cognitive Systems, \"With the addition of IBM Power Systems in SAP HANA Enterprise Cloud, we\\'re giving our clients more choices and greater flexibility to run their workloads where they want to across the hybrid cloud and accelerate digital transformation',\n", + " {'subject': {'text': 'Stephen Leonard, General Manager, IBM Cognitive Systems',\n", " 'begin': 2227,\n", - " 'end': 2519},\n", - " 'sentence': ' \"In June, IBM announced the availability of POWER9 in the IBM Cloud, taking the first step toward our goal of bringing IBM Cognitive Systems technology to our clients, no matter where they are,\" said Stephen Leonard, General Manager, IBM Cognitive Systems, \"With the addition of IBM Power Systems in SAP HANA Enterprise Cloud, we\\'re giving our clients more choices and greater flexibility to run their workloads where they want to across the hybrid cloud and accelerate digital transformation.\"',\n", - " 'object': {'text': 'In June, IBM announced the availability of POWER9 in the IBM Cloud, taking the first step toward our goal of bringing IBM Cognitive Systems technology to our clients, no matter where they are'},\n", - " 'action': {'verb': {'text': 'say', 'tense': 'past'},\n", + " 'end': 2282},\n", + " 'sentence': '\"In June, IBM announced the availability of POWER9 in the IBM Cloud, taking the first step toward our goal of bringing IBM Cognitive Systems technology to our clients, no matter where they are,\" said Stephen Leonard, General Manager, IBM Cognitive Systems, \"With the addition of IBM Power Systems in SAP HANA Enterprise Cloud, we\\'re giving our clients more choices and greater flexibility to run their workloads where they want to across the hybrid cloud and accelerate digital transformation.\"',\n", + " 'object': {'text': \", we're giving our clients more choices and greater flexibility to run their workloads where they want to across the hybrid cloud and accelerate digital transformation\"},\n", + " 'action': {'verb': {'text': 'say', 'tense': 'present'},\n", " 'text': 'said',\n", " 'normalized': 'say'}}]" ] @@ -349,7 +336,6 @@ ], "source": [ "# Code for slides: Run the Watson NLU semantic_roles model\n", - "\n", "semantic_roles_results = (\n", " natural_language_understanding\n", " .analyze(url=doc_url, features=nlu.Features(\n", @@ -360,7 +346,7 @@ "for s in someone_said_something:\n", " s[\"subject\"][\"begin\"] = doc_text.find(s[\"subject\"][\"text\"])\n", " s[\"subject\"][\"end\"] = s[\"subject\"][\"begin\"] + len(s[\"subject\"][\"text\"])\n", - " \n", + "\n", "\n", "someone_said_something_json = someone_said_something\n", "someone_said_something_json" @@ -371,11 +357,19 @@ "execution_count": 9, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).\n", + "Using a pipeline without specifying a model name and revision in production is not recommended.\n" + ] + }, { "data": { "text/plain": [ "[{'entity_group': 'PER',\n", - " 'score': 0.9996308088302612,\n", + " 'score': 0.99963087,\n", " 'word': 'Christoph Herman',\n", " 'start': 1213,\n", " 'end': 1229}]" @@ -464,6 +458,9 @@ "source": [ "# Write out all the data we've generated\n", "output_dir = \"./scipy_demo_data\"\n", + "if os.path.exists(output_dir):\n", + " shutil.rmtree(output_dir)\n", + "os.mkdir(output_dir)\n", "\n", "###################\n", "# Inputs to Part 1\n", @@ -472,15 +469,16 @@ "for m in person_mentions:\n", " m[\"start\"] = int(m[\"start\"])\n", " m[\"end\"] = int(m[\"end\"])\n", + " m[\"score\"] = float(m[\"score\"])\n", "with open(f\"{output_dir}/person_mentions.json\", \"w\") as f:\n", " json.dump(person_mentions, f)\n", - " \n", + "\n", "with open(f\"{output_dir}/person_mentions_watson.json\", \"w\") as f:\n", " json.dump(person_mentions_watson_json, f)\n", - " \n", + "\n", "with open(f\"{output_dir}/someone_said_something.json\", \"w\") as f:\n", " json.dump(someone_said_something_json, f)\n", - " \n", + "\n", "###################\n", "# Inputs to Part 2\n", "\n", @@ -499,7 +497,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -513,7 +511,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/tutorials/market/ibm_press_releases.txt b/tutorials/market/ibm_press_releases.txt index 453381d6..8383375d 100644 --- a/tutorials/market/ibm_press_releases.txt +++ b/tutorials/market/ibm_press_releases.txt @@ -47,7 +47,7 @@ https://newsroom.ibm.com/2020-05-21-we-trade-Digital-Trade-Finance-Network-stren https://newsroom.ibm.com/2020-07-30-IBM-and-Influential-Launch-AI-enabled-Social-Targeting-Solution-To-Help-Brands-Identify-Suitable-Influencers https://newsroom.ibm.com/2020-06-03-IBM-services-and-Aegon-sign-contract-of-portfolio-administration-for-800-000-individual-life-insurance-contracts https://newsroom.ibm.com/2020-06-03-IBM-and-Persistent-Systems-to-Accelerate-IBM-Cloud-Pak-Deployment-and-Core-IT-Modernization-for-Enterprises -https://newsroom.ibm.com/2020-06-04-Spains-CaixaBank-Teams-with-IBM-Services-to-Accelerate-Cloud-Transformation-and-Innovation-in-the-Financial-Services-Industry +#https://newsroom.ibm.com/2020-06-04-Spains-CaixaBank-Teams-with-IBM-Services-to-Accelerate-Cloud-Transformation-and-Innovation-in-the-Financial-Services-Industry https://newsroom.ibm.com/2020-06-04-IBM-Services-Collaborates-with-Lotte-Card-to-Adopt-A-Hybrid-Cloud-Strategy-To-Help-Transform-Core-Financial-Accounting-Systems https://newsroom.ibm.com/2020-06-04-Kvar-y-Arctic-Using-IBM-Blockchain-to-Trace-Norwegian-Farmed-Salmon-to-North-American-Stores https://newsroom.ibm.com/2020-06-04-Anaconda-and-IBM-Watson-Team-to-Simplify-Enterprise-Adoption-of-AI-Open-Source-Technologies @@ -116,7 +116,7 @@ https://newsroom.ibm.com/2020-09-18-Bloomberg-Television-Intelligence-Squared-US https://newsroom.ibm.com/2020-09-21-GEODIS-Uses-IBM-Sterling-Order-Management-to-Help-Retailers-Accelerate-Omnichannel-Customer-Experience-Capabilities-with-New-e-Commerce-Fulfillment https://newsroom.ibm.com/2020-09-22-IBM-Brings-Risk-Analytics-to-Security-Decision-Making https://newsroom.ibm.com/2020-09-23-IBM-Modernizes-Financial-Transaction-Solution-on-Red-Hat-OpenShift-to-Give-Banks-the-Flexibility-of-Hybrid-Cloud -https://newsroom.ibm.com/2020-09-28-2020-Call-for-Code-Global-Challenge-Finalists-Selected-for-Innovative-Solutions-to-Take-on-COVID-19-and-Climate-Change +#https://newsroom.ibm.com/2020-09-28-2020-Call-for-Code-Global-Challenge-Finalists-Selected-for-Innovative-Solutions-to-Take-on-COVID-19-and-Climate-Change https://newsroom.ibm.com/2020-09-30-IBM-Study-Majority-of-Global-C-Suite-Executives-are-Rapidly-Accelerating-Digital-Transformation-due-to-COVID-19-Pandemic-but-People-and-Talent-are-Key-to-Future-Progress https://newsroom.ibm.com/2020-09-30-BCI-in-Collaboration-with-IBM-Advances-Blockchain-based-Financial-Services-with-Electronic-Letter-of-Guarantee-for-Clients-in-Thailand diff --git a/tutorials/market/market_intelligence.py b/tutorials/market/market_intelligence.py index 5d1707ec..88f10373 100644 --- a/tutorials/market/market_intelligence.py +++ b/tutorials/market/market_intelligence.py @@ -31,9 +31,13 @@ def maybe_download_articles() -> pd.DataFrame: lines = [l.strip() for l in f.readlines()] article_urls = [l for l in lines if len(l) > 0 and l[0] != "#"] - article_htmls = [ - download_article(url) for url in article_urls - ] + article_htmls = [] + for url in article_urls: + try: + article_htmls.append(download_article(url)) + except urllib.error.HTTPError as e: + raise ValueError(f"Error downloading {url}") from e + to_write = pd.DataFrame({"url": article_urls, "html": article_htmls}) to_write.to_feather(file_name)