In [None]:
! pip install -q pyspark==3.1.2 spark-nlp


[K     |████████████████████████████████| 212.4 MB 62 kB/s 
[K     |████████████████████████████████| 142 kB 48.7 MB/s 
[K     |████████████████████████████████| 198 kB 14.1 MB/s 
[?25h  Building wheel for pyspark (setup.py) ... [?25l[?25hdone


In [None]:
import sparknlp

spark = sparknlp.start()
# params =>> gpu=False, spark23=False (start with spark 2.3)


print("Spark NLP version", sparknlp.version())
print("Apache Spark version:", spark.version)

Spark NLP version 3.4.2
Apache Spark version: 3.1.2


In [None]:
! cd ~/.ivy2/cache/com.johnsnowlabs.nlp/spark-nlp_2.12/jars && ls -lt


total 39272
-rw-r--r-- 1 root root 40210542 Mar  9 14:44 spark-nlp_2.12-3.4.2.jar


In [None]:
import os

# Install java
! apt-get update -qq
! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null

!wget -q https://archive.apache.org/dist/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz

!tar xf spark-2.3.0-bin-hadoop2.7.tgz
!pip install -q findspark

os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
os.environ["SPARK_HOME"] = "/content/spark-2.3.0-bin-hadoop2.7"
! java -version

import findspark
findspark.init()
from pyspark.sql import SparkSession

! pip install --ignore-installed -q spark-nlp==2.7.5
import sparknlp

spark = sparknlp.start(spark23=True)

openjdk version "1.8.0_312"
OpenJDK Runtime Environment (build 1.8.0_312-8u312-b07-0ubuntu1~18.04-b07)
OpenJDK 64-Bit Server VM (build 25.312-b07, mixed mode)
[K     |████████████████████████████████| 139 kB 5.3 MB/s 
[?25h

In [None]:
from pyspark.sql import SparkSession

def start(gpu=False, spark23=False):
    current_version="2.5.4"
    maven_spark24 = "com.johnsnowlabs.nlp:spark-nlp_2.11:{}".format(current_version)
    maven_gpu_spark24 = "com.johnsnowlabs.nlp:spark-nlp-gpu_2.11:{}".format(current_version)
    maven_spark23 = "com.johnsnowlabs.nlp:spark-nlp-spark23_2.11:{}".format(current_version)
    maven_gpu_spark23 = "com.johnsnowlabs.nlp:spark-nlp-gpu-spark23_2.11:{}".format(current_version)

    builder = SparkSession.builder \
        .appName("Spark NLP") \
        .master("local[*]") \
        .config("spark.driver.memory", "16G") \
        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
        .config("spark.kryoserializer.buffer.max", "1000M") \
        .config("spark.driver.maxResultSize", "0")
    if gpu and spark23:
        builder.config("spark.jars.packages", maven_gpu_spark23)
    elif spark23:
        builder.config("spark.jars.packages", maven_spark23)
    elif gpu:
        builder.config("spark.jars.packages", maven_gpu_spark24)
    else:
        builder.config("spark.jars.packages", maven_spark24)
        
    return builder.getOrCreate()

In [None]:
from sparknlp.pretrained import PretrainedPipeline

In [None]:
testDoc = '''
Peter is a very good persn.
My life in Russia is very intersting.
John and Peter are brthers. However they don't support each other that much.
Lucas Dunbercker is no longer happy. He has a good car though.
Europe is very culture rich. There are huge churches! and big houses!
'''

In [None]:
pipeline = PretrainedPipeline('explain_document_ml', lang='en')

explain_document_ml download started this may take some time.
Approx size to download 9.1 MB
[OK!]


In [None]:
pipeline.model.stages

[document_811d40a38b24,
 SENTENCE_ce56851acebe,
 REGEX_TOKENIZER_78daa3b4692f,
 SPELL_79c88338ef12,
 LEMMATIZER_c62ad8f355f9,
 STEMMER_caf11d1f4d0e,
 POS_dbb704204f6f]

In [None]:
pipeline_local = PretrainedPipeline.from_disk('/root/cache_pretrained/explain_document_ml_en_3.1.3_3.0_1632168876620')

In [None]:
%%time

result = pipeline.annotate(testDoc)

CPU times: user 32.9 ms, sys: 7.55 ms, total: 40.5 ms
Wall time: 2.14 s


In [None]:
result.keys()

dict_keys(['document', 'spell', 'pos', 'lemmas', 'token', 'stems', 'sentence'])

In [None]:
result['sentence']

['Peter is a very good persn.',
 'My life in Russia is very intersting.',
 'John and Peter are brthers.',
 "However they don't support each other that much.",
 'Lucas Dunbercker is no longer happy.',
 'He has a good car though.',
 'Europe is very culture rich.',
 'There are huge churches!',
 'and big houses!']

In [None]:
list(zip(result['token'], result['pos']))

[('Peter', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('very', 'RB'),
 ('good', 'JJ'),
 ('persn', 'NN'),
 ('.', '.'),
 ('My', 'PRP$'),
 ('life', 'NN'),
 ('in', 'IN'),
 ('Russia', 'NNP'),
 ('is', 'VBZ'),
 ('very', 'RB'),
 ('intersting', 'JJ'),
 ('.', '.'),
 ('John', 'NNP'),
 ('and', 'CC'),
 ('Peter', 'NNP'),
 ('are', 'VBP'),
 ('brthers', 'NNS'),
 ('.', '.'),
 ('However', 'RB'),
 ('they', 'PRP'),
 ("don't", 'VBP'),
 ('support', 'VB'),
 ('each', 'DT'),
 ('other', 'JJ'),
 ('that', 'IN'),
 ('much', 'JJ'),
 ('.', '.'),
 ('Lucas', 'NNP'),
 ('Dunbercker', 'NNP'),
 ('is', 'VBZ'),
 ('no', 'DT'),
 ('longer', 'RB'),
 ('happy', 'JJ'),
 ('.', '.'),
 ('He', 'PRP'),
 ('has', 'VBZ'),
 ('a', 'DT'),
 ('good', 'JJ'),
 ('car', 'NN'),
 ('though', 'IN'),
 ('.', '.'),
 ('Europe', 'NNP'),
 ('is', 'VBZ'),
 ('very', 'RB'),
 ('culture', 'RB'),
 ('rich', 'JJ'),
 ('.', '.'),
 ('There', 'EX'),
 ('are', 'VBP'),
 ('huge', 'JJ'),
 ('churches', 'NNS'),
 ('!', '.'),
 ('and', 'CC'),
 ('big', 'JJ'),
 ('houses', 'NNS'),
 ('!', 

In [None]:
list(zip(result['token'], result['lemmas'], result['stems'], result['spell']))

[('Peter', 'Peter', 'peter', 'Peter'),
 ('is', 'be', 'i', 'is'),
 ('a', 'a', 'a', 'a'),
 ('very', 'very', 'veri', 'very'),
 ('good', 'good', 'good', 'good'),
 ('persn', 'person', 'person', 'person'),
 ('.', '.', '.', '.'),
 ('My', 'My', 'my', 'My'),
 ('life', 'life', 'life', 'life'),
 ('in', 'in', 'in', 'in'),
 ('Russia', 'Russia', 'russia', 'Russia'),
 ('is', 'be', 'i', 'is'),
 ('very', 'very', 'veri', 'very'),
 ('intersting', 'interest', 'interest', 'interesting'),
 ('.', '.', '.', '.'),
 ('John', 'John', 'john', 'John'),
 ('and', 'and', 'and', 'and'),
 ('Peter', 'Peter', 'peter', 'Peter'),
 ('are', 'be', 'ar', 'are'),
 ('brthers', 'brother', 'brother', 'brothers'),
 ('.', '.', '.', '.'),
 ('However', 'However', 'howev', 'However'),
 ('they', 'they', 'thei', 'they'),
 ("don't", "don't", "don't", "don't"),
 ('support', 'support', 'support', 'support'),
 ('each', 'each', 'each', 'each'),
 ('other', 'other', 'other', 'other'),
 ('that', 'that', 'that', 'that'),
 ('much', 'much', 'much',

In [None]:
import pandas as pd
pd.set_option("display.max_rows", 100)

df = pd.DataFrame({'token':result['token'], 
                      'corrected':result['spell'], 'POS':result['pos'],
                      'lemmas':result['lemmas'], 'stems':result['stems']})
df

Unnamed: 0,token,corrected,POS,lemmas,stems
0,Peter,Peter,NNP,Peter,peter
1,is,is,VBZ,be,i
2,a,a,DT,a,a
3,very,very,RB,very,veri
4,good,good,JJ,good,good
5,persn,person,NN,person,person
6,.,.,.,.,.
7,My,My,PRP$,My,my
8,life,life,NN,life,life
9,in,in,IN,in,in


In [None]:
pipeline_dl = PretrainedPipeline('explain_document_dl', lang='en')
pipeline_dl.model.stages
pipeline_dl.model.stages[-2].getStorageRef()
pipeline_dl.model.stages[-2].getClasses()

explain_document_dl download started this may take some time.
Approx size to download 169.4 MB
[OK!]


['O', 'B-ORG', 'B-LOC', 'B-PER', 'I-PER', 'I-ORG', 'B-MISC', 'I-LOC', 'I-MISC']

In [None]:
%%time

result = pipeline_dl.annotate(testDoc)

result.keys()
result.keys()
result['entities']

CPU times: user 45.1 ms, sys: 7.52 ms, total: 52.6 ms
Wall time: 1.71 s


In [None]:
df = pd.DataFrame({'token':result['token'], 'ner_label':result['ner'],
                      'spell_corrected':result['checked'], 'POS':result['pos'],
                      'lemmas':result['lemma'], 'stems':result['stem']})

df


Unnamed: 0,token,ner_label,spell_corrected,POS,lemmas,stems
0,Peter,B-PER,Peter,NNP,Peter,peter
1,is,O,is,VBZ,be,i
2,a,O,a,DT,a,a
3,very,O,very,RB,very,veri
4,good,O,good,JJ,good,good
5,persn,O,person,NN,person,person
6,.,O,.,.,.,.
7,My,O,My,PRP$,My,my
8,life,O,life,NN,life,life
9,in,O,in,IN,in,in


In [None]:
recognize_entities = PretrainedPipeline('recognize_entities_dl', lang='en')
recognize_entities.model.stages
recognize_entities.model.stages[3].getStorageRef()
recognize_entities.model.stages[4].getClasses()

recognize_entities_dl download started this may take some time.
Approx size to download 160.1 MB
[OK!]


['O', 'B-ORG', 'B-LOC', 'B-PER', 'I-PER', 'I-ORG', 'B-MISC', 'I-LOC', 'I-MISC']

In [None]:
testDoc = '''
Peter is a very good persn.
My life in Russia is very intersting.
John and Peter are brthers. However they don't support each other that much.
Lucas Dunbercker is no longer happy. He has a good car though.
Europe is very culture rich. There are huge churches! and big houses!
'''

result = recognize_entities.annotate(testDoc)

list(zip(result['token'], result['ner']))

[('Peter', 'B-PER'),
 ('is', 'O'),
 ('a', 'O'),
 ('very', 'O'),
 ('good', 'O'),
 ('persn', 'O'),
 ('.', 'O'),
 ('My', 'O'),
 ('life', 'O'),
 ('in', 'O'),
 ('Russia', 'B-LOC'),
 ('is', 'O'),
 ('very', 'O'),
 ('intersting', 'O'),
 ('.', 'O'),
 ('John', 'B-PER'),
 ('and', 'O'),
 ('Peter', 'B-PER'),
 ('are', 'O'),
 ('brthers', 'O'),
 ('.', 'O'),
 ('However', 'O'),
 ('they', 'O'),
 ("don't", 'O'),
 ('support', 'O'),
 ('each', 'O'),
 ('other', 'O'),
 ('that', 'O'),
 ('much', 'O'),
 ('.', 'O'),
 ('Lucas', 'B-PER'),
 ('Dunbercker', 'I-PER'),
 ('is', 'O'),
 ('no', 'O'),
 ('longer', 'O'),
 ('happy', 'O'),
 ('.', 'O'),
 ('He', 'O'),
 ('has', 'O'),
 ('a', 'O'),
 ('good', 'O'),
 ('car', 'O'),
 ('though', 'O'),
 ('.', 'O'),
 ('Europe', 'B-LOC'),
 ('is', 'O'),
 ('very', 'O'),
 ('culture', 'O'),
 ('rich', 'O'),
 ('.', 'O'),
 ('There', 'O'),
 ('are', 'O'),
 ('huge', 'O'),
 ('churches', 'O'),
 ('!', 'O'),
 ('and', 'O'),
 ('big', 'O'),
 ('houses', 'O'),
 ('!', 'O')]

In [None]:
clean_stop = PretrainedPipeline('clean_stop', lang='en')
clean_stop.model.stages # clean stop pipeline stages
result = clean_stop.annotate(testDoc)
result.keys()
' '.join(result['cleanTokens'])

clean_stop download started this may take some time.
Approx size to download 22.8 KB
[OK!]


"Peter good persn . life Russia intersting . John Peter brthers . don't support . Lucas Dunbercker longer happy . good car . Europe culture rich . huge churches ! big houses !"

In [None]:
spell_checker = PretrainedPipeline('check_spelling', lang='en')
testDoc = '''
Peter is a very good persn.
My life in Russia is very intersting.
John and Peter are brthers. However they don't support each other that much.
Lucas Dunbercker is no longer happy. He has a good car though.
Europe is very culture rich. There are huge churches! and big houses!
'''

result = spell_checker.annotate(testDoc)

result.keys()
list(zip(result['token'], result['checked']))

check_spelling download started this may take some time.
Approx size to download 913.5 KB
[OK!]


[('Peter', 'Peter'),
 ('is', 'is'),
 ('a', 'a'),
 ('very', 'very'),
 ('good', 'good'),
 ('persn', 'person'),
 ('.', '.'),
 ('My', 'My'),
 ('life', 'life'),
 ('in', 'in'),
 ('Russia', 'Russia'),
 ('is', 'is'),
 ('very', 'very'),
 ('intersting', 'interesting'),
 ('.', '.'),
 ('John', 'John'),
 ('and', 'and'),
 ('Peter', 'Peter'),
 ('are', 'are'),
 ('brthers', 'brothers'),
 ('.', '.'),
 ('However', 'However'),
 ('they', 'they'),
 ("don't", "don't"),
 ('support', 'support'),
 ('each', 'each'),
 ('other', 'other'),
 ('that', 'that'),
 ('much', 'much'),
 ('.', '.'),
 ('Lucas', 'Lucas'),
 ('Dunbercker', 'Dunbercker'),
 ('is', 'is'),
 ('no', 'no'),
 ('longer', 'longer'),
 ('happy', 'happy'),
 ('.', '.'),
 ('He', 'He'),
 ('has', 'has'),
 ('a', 'a'),
 ('good', 'good'),
 ('car', 'car'),
 ('though', 'though'),
 ('.', '.'),
 ('Europe', 'Europe'),
 ('is', 'is'),
 ('very', 'very'),
 ('culture', 'culture'),
 ('rich', 'rich'),
 ('.', '.'),
 ('There', 'There'),
 ('are', 'are'),
 ('huge', 'huge'),
 ('chu

In [None]:
testDoc_list = ['French author who helped pioner the science-fiction genre.',
'Verne wrate about space, air, and underwater travel before navigable aircrast',
'Practical submarines were invented, and before any means of space travel had been devised.']

testDoc_list

['French author who helped pioner the science-fiction genre.',
 'Verne wrate about space, air, and underwater travel before navigable aircrast',
 'Practical submarines were invented, and before any means of space travel had been devised.']

In [None]:
pipeline = PretrainedPipeline('explain_document_ml', lang='en')
result_list = pipeline.annotate(testDoc_list)

len (result_list)
result_list[0]

explain_document_ml download started this may take some time.
Approx size to download 9.1 MB
[OK!]


{'document': ['French author who helped pioner the science-fiction genre.'],
 'lemmas': ['French',
  'author',
  'who',
  'help',
  'pioneer',
  'the',
  'sciencefiction',
  'genre',
  '.'],
 'pos': ['JJ', 'NN', 'WP', 'VBD', 'NN', 'DT', 'NN', 'NN', '.'],
 'sentence': ['French author who helped pioner the science-fiction genre.'],
 'spell': ['French',
  'author',
  'who',
  'helped',
  'pioneer',
  'the',
  'sciencefiction',
  'genre',
  '.'],
 'stems': ['french',
  'author',
  'who',
  'help',
  'pioneer',
  'the',
  'sciencefict',
  'genr',
  '.'],
 'token': ['French',
  'author',
  'who',
  'helped',
  'pioner',
  'the',
  'science-fiction',
  'genre',
  '.']}

In [None]:
text = 'Peter Parker is a nice guy and lives in New York'
# pipeline_dl >> explain_document_dl

detailed_result = pipeline_dl.fullAnnotate(text)
detailed_result
detailed_result[0]['entities']
detailed_result[0]['entities'][0].result
print("metadata dict:",detailed_result[0]["entities"][0].metadata)
print("entity type",detailed_result[0]["entities"][0].metadata["entity"])

metadata dict: {'entity': 'PER', 'sentence': '0', 'chunk': '0'}
entity type PER


In [None]:
chunks=[]
entities=[]
for n in detailed_result[0]['entities']:
        
  chunks.append(n.result)
  entities.append(n.metadata['entity']) 
    
df = pd.DataFrame({'chunks':chunks, 'entities':entities})
df    

Unnamed: 0,chunks,entities
0,Peter Parker,PER
1,New York,LOC


In [None]:
tuples = []

for x,y,z in zip(detailed_result[0]["token"], detailed_result[0]["pos"], detailed_result[0]["ner"]):

  tuples.append((int(x.metadata['sentence']), x.result, x.begin, x.end, y.result, z.result))

df = pd.DataFrame(tuples, columns=['sent_id','token','start','end','pos', 'ner'])

df

Unnamed: 0,sent_id,token,start,end,pos,ner
0,0,Peter,0,4,NNP,B-PER
1,0,Parker,6,11,NNP,I-PER
2,0,is,13,14,VBZ,O
3,0,a,16,16,DT,O
4,0,nice,18,21,JJ,O
5,0,guy,23,25,NN,O
6,0,and,27,29,CC,O
7,0,lives,31,35,NNS,O
8,0,in,37,38,IN,O
9,0,New,40,42,NNP,B-LOC


In [None]:
sentiment = PretrainedPipeline('analyze_sentiment', lang='en')
result = sentiment.annotate("The movie I watched today was not a good one")

result['sentiment']


analyze_sentiment download started this may take some time.
Approx size to download 4.9 MB
[OK!]


['negative']

In [None]:
sentiment_imdb = PretrainedPipeline('analyze_sentimentdl_use_imdb', lang='en')
sentiment_imdb_glove = PretrainedPipeline('analyze_sentimentdl_glove_imdb', lang='en')
comment = '''
It's a very scary film but what impressed me was how true the film sticks to the original's tricks; it isn't filled with loud in-your-face jump scares, in fact, a lot of what makes this film scary is the slick cinematography and intricate shadow play. The use of lighting and creation of atmosphere is what makes this film so tense, which is why it's perfectly suited for those who like Horror movies but without the obnoxious gore.
'''
result = sentiment_imdb_glove.annotate(comment)

result['sentiment']
sentiment_imdb_glove.fullAnnotate(comment)[0]['sentiment']

analyze_sentimentdl_use_imdb download started this may take some time.
Approx size to download 935.7 MB
[OK!]
analyze_sentimentdl_glove_imdb download started this may take some time.
Approx size to download 155.3 MB
[OK!]


[Annotation(category, 0, 433, pos, {'sentence': '0', 'pos': '0.98675287', 'neg': '0.013247096'})]

In [None]:
import os
import sys

from pyspark.sql import SparkSession
from pyspark.ml import Pipeline

from sparknlp.annotator import *
from sparknlp.common import *
from sparknlp.base import *

import pandas as pd
from pyspark.ml.feature import SQLTransformer
from pyspark.ml.feature import StringIndexer
import pandas as pd
import numpy as np
import sparknlp
spark = sparknlp.start()

print("Spark NLP version: ", sparknlp.version())
print("Apache Spark version: ", spark.version)

spark

Spark NLP version:  3.4.2
Apache Spark version:  3.1.2


In [None]:
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os
root_dir= "/content/drive/My Drive/"
project_folder="Colab Notebooks/Project_folder/"
def create_and_set_working_directory(project_folder):
  if os.path.isdir(root_dir + project_folder)== False:
    os.mkdir(root_dir+project_folder)
    print(root_dir+project_folder+'did not existed and was created')
    os.chdir(root_dir+project_folder)

In [None]:
from google.colab import files
uploaded = files.upload()

Saving ADMISSIONS.csv to ADMISSIONS.csv


In [None]:
import io
readmission = pd.read_csv(io.BytesIO(uploaded['ADMISSIONS.csv']))

In [None]:
readmission.head(30)

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,CATHOLIC,MARRIED,WHITE,,,BRAIN MASS,0,1
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,PROTESTANT QUAKER,SINGLE,WHITE,,,INTERIOR MYOCARDIAL INFARCTION,0,1
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,UNOBTAINABLE,MARRIED,WHITE,2160-11-02 01:01:00,2160-11-02 04:27:00,ACUTE CORONARY SYNDROME,0,1
5,26,26,197661,2126-05-06 15:16:00,2126-05-13 15:00:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Medicare,,CATHOLIC,SINGLE,UNKNOWN/NOT SPECIFIED,,,V-TACH,0,1
6,27,27,134931,2191-11-30 22:16:00,2191-12-03 14:45:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,CATHOLIC,,WHITE,,,NEWBORN,0,1
7,28,28,162569,2177-09-01 07:15:00,2177-09-06 16:00:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1
8,29,30,104557,2172-10-14 14:17:00,2172-10-19 14:37:00,,URGENT,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,UNKNOWN/NOT SPECIFIED,,,UNSTABLE ANGINA\CATH,0,1
9,30,31,128652,2108-08-22 23:27:00,2108-08-30 15:00:00,2108-08-30 15:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,,CATHOLIC,MARRIED,WHITE,,,STATUS EPILEPTICUS,1,1


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df_admission=pd.read_csv('ADMISSIONS.csv')

In [None]:
df_admission.ADMITTIME = pd.to_datetime(df_admission.ADMITTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')
df_admission.DISCHTIME = pd.to_datetime(df_admission.DISCHTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')
df_admission.DEATHTIME = pd.to_datetime(df_admission.DEATHTIME, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')
df_admission.DOB=pd.to_datetime(df_admission.DOB, format = '%Y-%m-%d %H:%M:%S', errors = 'coerce')