# 문장 토큰화

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
text="I never thought through love we'd be. Making one as lovely as she. But isn't she lovely made from love."

In [None]:
from nltk.tokenize import sent_tokenize

t= sent_tokenize(text) # = Corpus

In [None]:
for i in range(len(t)):
  print(t[i])

I never thought through love we'd be.
Making one as lovely as she.
But isn't she lovely made from love.


# Keras의 Tokenizer 사용하기
- 정수 인코딩
- 디코딩 지원
- 단어 토큰화 지원(문장 넣으면 단어토큰됨)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()

In [None]:
# 코퍼스를 이용해 단어집합 만들 수 있도록 훈련
tokenizer.fit_on_texts(sent_tokenize(text))

In [None]:
# 생성된 단어집합 확인
tokenizer.word_index # 단어 옆 숫자는 단지 '인덱스' 일 뿐뿐

{'love': 1,
 'as': 2,
 'lovely': 3,
 'she': 4,
 'i': 5,
 'never': 6,
 'thought': 7,
 'through': 8,
 "we'd": 9,
 'be': 10,
 'making': 11,
 'one': 12,
 'but': 13,
 "isn't": 14,
 'made': 15,
 'from': 16}

In [None]:
# 단어의 빈도수 확인
tokenizer.word_counts

OrderedDict([('i', 1),
             ('never', 1),
             ('thought', 1),
             ('through', 1),
             ('love', 2),
             ("we'd", 1),
             ('be', 1),
             ('making', 1),
             ('one', 1),
             ('as', 2),
             ('lovely', 2),
             ('she', 2),
             ('but', 1),
             ("isn't", 1),
             ('made', 1),
             ('from', 1)])

In [None]:
# 인코딩
corpus = ["she isn't lovely but I love her"]
print(tokenizer.texts_to_sequences(corpus))

[[4, 14, 3, 13, 5, 1]]


In [None]:
# 디코딩
print(tokenizer.sequences_to_texts([[5,1,6,13,16,8,10]]))

['i love never but from through be']


# OOV(Out of Vocabulary) 설정 

In [None]:
# 단어 집합의 크기: vocab_size
vocab_size=5 # 16개 단어가 있지만 상위위 5개만 쓰겠다

# vocab_size+2 = 실제 사용할 단어집합 5개 + pad, oov 토큰 개수까지 포함 = 총 7개
tokenizer = Tokenizer(num_words = vocab_size+2, oov_token='<oov>')
tokenizer.fit_on_texts(sent_tokenize(text))

In [None]:
tokenizer.word_index
# 결과로 나오는건 단어집합 내 모든 단어가 나오고, 실제 사용하는건 5단어

{'<oov>': 1,
 'love': 2,
 'as': 3,
 'lovely': 4,
 'she': 5,
 'i': 6,
 'never': 7,
 'thought': 8,
 'through': 9,
 "we'd": 10,
 'be': 11,
 'making': 12,
 'one': 13,
 'but': 14,
 "isn't": 15,
 'made': 16,
 'from': 17}

In [None]:
corpus = ['she is wonderful']
tokenizer.texts_to_sequences(corpus)

[[5, 1, 1]]

# Padding
- 원하는 길이만큼 0을 채워줌
- 디폴트: prepadding

In [None]:
integer_tokens = tokenizer.texts_to_sequences(corpus)
integer_tokens

[[5, 1, 1]]

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# prepadding = 디폴트
padded_tokens = pad_sequences(integer_tokens, maxlen=5)
padded_tokens

array([[0, 0, 5, 1, 1]], dtype=int32)

In [None]:
# postpadding 
padded_tokens = pad_sequences(integer_tokens, maxlen=5, padding='post')
padded_tokens

array([[5, 1, 1, 0, 0]], dtype=int32)

# Text Vectorization

In [None]:
text1 = """I'm at a payphone trying to call home
All of my change I spent on you
Where have the times gone?
Baby, it's all wrong
Where are the plans we made for two?
Yeah, I, I know it's hard to remember
The people we used to be
It's even harder to picture
That you're not here next to me
You say it's too late to make it
But is it too late to try?
And in our time that you wasted
All of our bridges burned down
I've wasted my nights
You turned out the lights
Now I'm paralyzed
Still stuck in that time, when we called it love
But even the sun sets in paradise
I'm at a payphone, trying to call home
All of my change I spent on you
Where have the times gone?
Baby, it's all wrong
Where are the plans we made for two?
If "happy ever after" did exist
I would still be holding you like this
All those fairy tales are full of shit
One more fucking love song, I'll be sick, oh
You turned your back on tomorrow
'Cause you forgot yesterday
I gave you my love to borrow
But you just gave it away
You can't expect me to be fine
I don't expect you to care
I know I've said it before
But all of our bridges burned down
I've wasted my nights
You turned out the lights
Now I'm paralyzed
Still stuck in that time
When we called it love
But even the sun sets in paradise
I'm at a payphone trying to call home
All of my change I spent on you
Where have the times gone?
Baby, it's all wrong
Where are the plans we made for two?
If "happy ever after" did exist
I would still be holding you like this
And all those fairy tales are full of shit
One more fucking love song, I'll be sick
Now I'm at a payphone
Man, fuck that shit
I'll be out spending all this money while you sitting round
Wondering why wasn't you who came up from nothing
Made it from the bottom, now when you see me I'm strutting
And all of my cars start with a push of a button
Telling me the chances I blew up or whatever you call it
Switched the number to my phone so you never could call it
Don't need my name on my show you can tell it I'm ballin'
Swish, what a shame could have got picked
Had a really good game but you missed your last shot
So you talk about who you see at the top
Or what you could have saw, but sad to say it's over for
Phantom pulled valet open doors
Wiz like go away got what you was looking for
Now it's me who they want, so you can go
And take that little piece of shit with you
I'm at a payphone, trying to call home
All of my change I spent on you
Where have the times gone?
Baby, it's all wrong
Where are the plans we made for two?
If "happy ever after" did exist
I would still be holding you like this
All those fairy tales are full of shit
One more fucking love song, I'll be sick
Now I'm at a payphone"""

text2 = """Spent 24 hours, I need more hours with you
You spent the weekend getting even, ooh
We spent the late nights making things right between us
But now it's all good, babe
Roll that back wood, babe
And play me close
'Cause girls like you run 'round with guys like me
'Til sun down when I come through
I need a girl like you, yeah yeah
Girls like you love fun and, yeah, me too
What I want when I come through
I need a girl like you, yeah yeah
Yeah yeah yeah, yeah yeah yeah
I need a girl like you, yeah yeah
Yeah yeah yeah, yeah yeah yeah
I need a girl like you
I spent last night on the last flight to you (ey ya)
Took a whole day up trying to get way up, ooh
We spent the daylight trying to make things right between us
But now it's all good, babe
Roll that back wood, babe
And play me close, yeah
'Cause girls like you run 'round with guys like me
'Til sun down when I come through
I need a girl like you, yeah yeah
Girls like you love fun and, yeah, me too
What I want when I come through
I need a girl like you, yeah yeah
Yeah yeah yeah, yeah yeah yeah
I need a girl like you, yeah yeah
Yeah yeah yeah, yeah yeah yeah
I need a girl like you, yeah yeah
I need a girl like you, yeah yeah
I need a girl like you
Maybe it's 6:45
Maybe I'm barely alive
Maybe you've taken my shit for the last time, yeah
Maybe I know that I'm drunk
Maybe I know you're the one
Maybe you thinking it's better if you drive
Oh, 'cause girls like you run 'round with guys like me
'Til sun down when I come through
I need a girl like you, yeah
'Cause girls like you run 'round with guys like me
'Til sun down when I come through
I need a girl like you, yeah yeah
Girls like you love fun and, yeah, me too
What I want when I come through
I need a girl like you, yeah yeah
Yeah yeah yeah, yeah yeah yeah
I need a girl like you, yeah yeah
Yeah yeah yeah, yeah yeah yeah
I need a girl like you"""

In [None]:
# Corpus 만들기
corpus = [text1, text2]
len(corpus) # 말뭉치가 2개

2

## DTM (Document Term Matrix)
- 문장 내 단어의 빈도

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

cnt_vector = CountVectorizer()
cnt_vector.fit(corpus)

In [None]:
feature_vector = cnt_vector.transform(corpus)
feature_vector


<2x225 sparse matrix of type '<class 'numpy.int64'>'
	with 271 stored elements in Compressed Sparse Row format>

In [None]:
feature_vector.toarray()

array([[ 0,  0,  1,  3,  0, 15,  4,  7,  7,  2,  0,  4,  1,  1,  0,  9,
         1,  0,  0,  1,  1,  1,  2,  2,  7,  1,  6,  2,  1,  3,  1,  1,
         1,  1,  4,  0,  0,  3,  0,  0,  3,  2,  1,  2,  0,  0,  3,  3,
         3,  2,  0,  3,  1,  0,  6,  1,  2,  1,  3,  3,  0,  1,  2,  0,
         0,  0,  0,  2,  4,  1,  2,  0,  1,  3,  1,  1,  6,  1,  3,  4,
         0,  3,  5,  1, 19,  1,  2,  1,  2,  2,  4,  1,  4,  1,  6,  5,
         1,  0,  1,  0,  5,  1,  1,  3, 11,  1,  1,  1,  1,  0,  2,  1,
         1,  6,  1, 12,  1,  6,  3,  0,  1,  2,  3,  3,  1,  2,  2,  6,
         1,  1,  1,  1,  1,  1,  4,  0,  1,  1,  1,  1,  1,  0,  0,  1,
         0,  1,  1,  1,  2,  2,  2,  1,  5,  1,  1,  3,  1,  3,  3,  1,
         4,  1,  5,  1,  2,  2,  1,  1,  1,  0,  3,  1,  1,  1,  6, 17,
         1,  0,  0,  4,  3,  0,  0,  3,  4, 15,  1,  2,  0,  1,  1,  4,
         3,  4,  2,  0,  1,  1,  3,  1,  1,  1,  3,  0,  7,  0,  3,  1,
         3,  8,  1,  3,  0,  1,  2,  1,  1,  0,  3,  4,  0,  1, 

In [None]:
import pandas as pd

# 단어 확인
vocabs = sorted(cnt_vector.vocabulary_.items()) # 단어 순서대로 정렬
vocabs = [ item[0] for item in vocabs ] # 단어만 뽑아내기


dtm = pd.DataFrame(
    columns=vocabs,
    data=feature_vector.toarray()
)
dtm

Unnamed: 0,24,45,about,after,alive,all,and,are,at,away,babe,baby,back,ballin,barely,be,before,better,between,blew,borrow,bottom,bridges,burned,but,button,call,called,came,can,care,cars,cause,chances,change,close,come,could,day,daylight,...,to,tomorrow,too,took,top,try,trying,turned,two,up,us,used,valet,ve,want,was,wasn,wasted,way,we,weekend,what,whatever,when,where,while,who,whole,why,with,wiz,wondering,wood,would,wrong,ya,yeah,yesterday,you,your
0,0,0,1,3,0,15,4,7,7,2,0,4,1,1,0,9,1,0,0,1,1,1,2,2,7,1,6,2,1,3,1,1,1,1,4,0,0,3,0,0,...,15,1,2,0,1,1,4,3,4,2,0,1,1,3,1,1,1,3,0,7,0,3,1,3,8,1,3,0,1,2,1,1,0,3,4,0,1,1,31,2
1,1,1,0,0,1,2,5,0,0,0,4,0,2,0,1,0,0,1,2,0,0,0,0,0,2,0,0,0,0,0,0,0,4,0,0,2,7,0,1,1,...,3,0,3,1,0,0,2,0,0,2,2,0,0,1,3,0,0,0,1,2,1,3,0,7,0,0,0,1,0,5,0,0,2,0,0,1,64,0,29,0


## TF-IDF

In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vect = TfidfVectorizer()
tfidf_vect.fit(corpus)

In [None]:
feature_vect = tfidf_vect.transform(corpus)

feature_vect.toarray()

array([[0.        , 0.        , 0.02024321, 0.06072962, 0.        ,
        0.21604811, 0.05761283, 0.14170244, 0.14170244, 0.04048641,
        0.        , 0.08097282, 0.01440321, 0.02024321, 0.        ,
        0.18218885, 0.02024321, 0.        , 0.        , 0.02024321,
        0.02024321, 0.02024321, 0.04048641, 0.04048641, 0.10082245,
        0.02024321, 0.12145923, 0.04048641, 0.02024321, 0.06072962,
        0.02024321, 0.02024321, 0.01440321, 0.02024321, 0.08097282,
        0.        , 0.        , 0.06072962, 0.        , 0.        ,
        0.06072962, 0.04048641, 0.02024321, 0.02880641, 0.        ,
        0.        , 0.04320962, 0.06072962, 0.06072962, 0.04048641,
        0.        , 0.06072962, 0.02024321, 0.        , 0.08641924,
        0.02024321, 0.04048641, 0.02024321, 0.06072962, 0.06072962,
        0.        , 0.02024321, 0.04048641, 0.        , 0.        ,
        0.        , 0.        , 0.04048641, 0.08097282, 0.01440321,
        0.04048641, 0.        , 0.02024321, 0.06

In [None]:
vocabs = sorted(tfidf_vect.vocabulary_.items()) # 단어 순서대로 정렬
vocabs = [ item[0] for item in vocabs ] # 단어만 뽑아내기

tfidf = pd.DataFrame(
    columns=vocabs,
    data=feature_vect.toarray()
)

tfidf

Unnamed: 0,24,45,about,after,alive,all,and,are,at,away,babe,baby,back,ballin,barely,be,before,better,between,blew,borrow,bottom,bridges,burned,but,button,call,called,came,can,care,cars,cause,chances,change,close,come,could,day,daylight,...,to,tomorrow,too,took,top,try,trying,turned,two,up,us,used,valet,ve,want,was,wasn,wasted,way,we,weekend,what,whatever,when,where,while,who,whole,why,with,wiz,wondering,wood,would,wrong,ya,yeah,yesterday,you,your
0,0.0,0.0,0.020243,0.06073,0.0,0.216048,0.057613,0.141702,0.141702,0.040486,0.0,0.080973,0.014403,0.020243,0.0,0.182189,0.020243,0.0,0.0,0.020243,0.020243,0.020243,0.040486,0.040486,0.100822,0.020243,0.121459,0.040486,0.020243,0.06073,0.020243,0.020243,0.014403,0.020243,0.080973,0.0,0.0,0.06073,0.0,0.0,...,0.216048,0.020243,0.028806,0.0,0.020243,0.020243,0.057613,0.06073,0.080973,0.028806,0.0,0.020243,0.020243,0.04321,0.014403,0.020243,0.020243,0.06073,0.0,0.100822,0.0,0.04321,0.020243,0.04321,0.161946,0.020243,0.06073,0.0,0.020243,0.028806,0.020243,0.020243,0.0,0.06073,0.080973,0.0,0.014403,0.020243,0.446499,0.040486
1,0.016364,0.016364,0.0,0.0,0.016364,0.023286,0.058216,0.0,0.0,0.0,0.065456,0.0,0.023286,0.0,0.016364,0.0,0.0,0.016364,0.032728,0.0,0.0,0.0,0.0,0.0,0.023286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046573,0.0,0.0,0.032728,0.114549,0.0,0.016364,0.016364,...,0.03493,0.0,0.03493,0.016364,0.0,0.0,0.023286,0.0,0.0,0.023286,0.032728,0.0,0.0,0.011643,0.03493,0.0,0.0,0.0,0.016364,0.023286,0.016364,0.03493,0.0,0.081502,0.0,0.0,0.0,0.016364,0.0,0.058216,0.0,0.0,0.032728,0.0,0.0,0.016364,0.745164,0.0,0.337652,0.0


# 벡터화된 텍스트 정보 활용

In [36]:
from sklearn.datasets import fetch_20newsgroups

news_data = fetch_20newsgroups(subset='all', random_state=42)

In [None]:
# target 확인
news_data.target_names

['alt.atheism',
 'comp.graphics',
 'comp.os.ms-windows.misc',
 'comp.sys.ibm.pc.hardware',
 'comp.sys.mac.hardware',
 'comp.windows.x',
 'misc.forsale',
 'rec.autos',
 'rec.motorcycles',
 'rec.sport.baseball',
 'rec.sport.hockey',
 'sci.crypt',
 'sci.electronics',
 'sci.med',
 'sci.space',
 'soc.religion.christian',
 'talk.politics.guns',
 'talk.politics.mideast',
 'talk.politics.misc',
 'talk.religion.misc']

In [None]:
news_data.keys()

dict_keys(['data', 'filenames', 'target_names', 'target', 'DESCR'])

In [None]:
print(news_data['data'][9])
print(news_data['target_names'][9])

From: arromdee@jyusenkyou.cs.jhu.edu (Ken Arromdee)
Subject: Re: Christians above the Law? was Clarification of pe
Organization: Johns Hopkins University CS Dept.
Lines: 13

In article <C61Kow.E4z@mailer.cc.fsu.edu> dlecoint@garnet.acns.fsu.edu (Darius_Lecointe) writes:
>>Jesus was a JEW, not a Christian.

If a Christian means someone who believes in the divinity of Jesus, it is safe
to say that Jesus was a Christian.
--
"On the first day after Christmas my truelove served to me...  Leftover Turkey!
On the second day after Christmas my truelove served to me...  Turkey Casserole
    that she made from Leftover Turkey.
[days 3-4 deleted] ...  Flaming Turkey Wings! ...
   -- Pizza Hut commercial (and M*tlu/A*gic bait)

Ken Arromdee (arromdee@jyusenkyou.cs.jhu.edu)

rec.sport.baseball


In [37]:
# 훈련 데이터 가져오기. header, footers, 특수문자를 제거하고 불러오기
train_news = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'), random_state=42)
X_train = train_news['data']
y_train = train_news['target']

test_news = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'), random_state=42)
X_test = test_news['data']
y_test = test_news['target']

In [38]:
tfidf_vect = TfidfVectorizer()
X_train_tfidf_vect = tfidf_vect.fit_transform(X_train) # 훈련세트로 transformer만들면 test세트에선 transformer만들면 안됨
X_test_tfidf_vect = tfidf_vect.transform(X_test) # 여기선 fit하면 안됨!! 

In [39]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr_clf = LogisticRegression()

lr_clf.fit(X_train_tfidf_vect, y_train)
pred= lr_clf.predict(X_test_tfidf_vect)

accuracy_score(y_test, pred)

0.6736590546999469

In [None]:
news_baseball_0427="""CNN
 — 
It was one of those perfect, historic nights for Adolis García on Saturday as he slugged three home runs in his five hits and added eight runs as the Texas Rangers dismantled the Oakland Athletics 18-3.

Each homer was projected at 400+ feet, combining for an incredible 1,252 feet of home run distance. It capped the Rangers' dominant night, after they lost the series opener 5-4 on Friday, and marked a career-best performance for García as well as the first eight RBI game by a Ranger since Nelson Cruz more than a decade ago.

"It was an incredible night for me," García said through interpreter Raul Cardenas, according to MLB.com. "I didn't expect something like this to happen, but I'm really blessed and thankful for it.

"I was just looking for certain pitches, in a certain zone. I wasn't trying to do too much and not overthinking it, just trying to make good contact."

It was an astonishing night for the right fielder. He hit a two-run home run in the first, letting it fly high into the crowd to tie the score, was hit by a pitch in the second, then hit another couple of two-run homers in the third and fifth."""


test_vector = tfidf_vect.transform([news_baseball_0427])
lr_clf.predict(test_vector)

array([9])

In [None]:
guns_cnn = '''State investigators found 89 shell casings and believe seven guns were used at a dance studio shooting that left four people dead during a Sweet 16 party in Dadeville, Alabama, earlier this month, according to a report from CNN affiliate WBMA.

The new details came Tuesday during testimony from an Alabama Law Enforcement Agency special agent at a bail hearing for several shooting suspects, according to WBMA.

The six suspects charged in connection with the massacre were denied bond, according to the district attorney’s office of the 5th Judicial Circuit of Alabama.

Five of the suspects were ordered to be held without bond until the trial and are being treated as adults, a news release from the office said. A sixth suspect, who is 15, was also denied bond at a separate hearing because he is a minor, according to a separate release.

01 alabama shooting presser 0419
6 people face murder charges for the Sweet 16 party massacre that left 4 dead and 32 injured
Another new detail came to light as the investigator testified one of the deceased victims may have fired first, according to CNN affiliates WTVM and WFSA.

Corbin Holston, 23, was found dead inside the dance studio with a gun on his chest, the investigator reportedly told the court.

The party for 16-year-old Alexis Dowdell was in full swing when gunfire erupted, witnesses said. Alexis’ 18-year-old brother, Philstavious “Phil” Dowdell, was killed, along with Marsiah Emmanuel Collins, 19; Shaunkivia “Keke” Nicole Smith, 17; and Holston, the Tallapoosa County coroner said.


The state investigator testified that as many as 60 people were inside the dance studio at the time of the shooting, WTVM And WFSA reported.

Tyreese “Ty Reik” McCullough, 17, Travis McCullough, 16, Wilson LaMar Hill Jr., 20, Johnny Letron Brown, 20, Willie George Brown Jr., 19, and the unnamed 15-year-old have all been charged with four counts of reckless murder for their alleged involvement in the April 15 shooting that killed the four people and injured 32 others, including 15 who a hospital spokesperson said were hit by gunfire.

From left, Philstavious Dowdell, Keke Smith, Marsiah Collins and Corbin Holston
A pair of best friends, a gifted athlete and a college hopeful are among the victims of the Alabama Sweet 16 birthday party shooting
“The State intends to file a motion to transfer that juvenile to be tried as an adult,” the DA’s office said. “The Juvenile Court will have a hearing to determine whether or not he will be transferred and tried as an adult.”

CNN has attempted to reach a representative for the accused.

The hearing for the Browns and Hill was open to the public, while the McCulloughs’ session was closed, according to the three affiliates, which said the sixth suspect was not in court.

Authorities have not said what connection, if any, the suspects had to Alexis’ birthday party.

CNN has reached out to the Alabama Law Enforcement Agency for comment.

Authorities have not publicly discussed a motive in the killings that stunned the small Alabama city and joined it to a slew of other American communities grieving gun violence this year. The shooting is one of more than 173 mass shootings – in which four or more people were shot, excluding the shooter – reported in the US so far this year, according to the Gun Violence Archive.'''

test_vector1 = tfidf_vect.transform([guns_cnn])
lr_clf.predict(test_vector1)

array([16])

In [None]:
space_cnn='''CNN
 — 
A Japanese lunar lander, carrying a rover developed in the United Arab Emirates, attempted to find its footing on the moon’s surface Tuesday — and potentially mark the world’s first lunar landing for a commercially developed spacecraft. But flight controllers on the ground were not immediately able to regain contact, prompting the company to presume the spacecraft was lost.

The lander, built by Japanese firm Ispace, launched atop a SpaceX rocket from Cape Canaveral, Florida, on December 11. The spacecraft then made a three-month trek to enter orbit around the moon, which lies about 239,000 miles (383,000 kilometers) from Earth, using a low-energy trajectory. Overall, the journey took the lander about 870,000 miles (1.4 million kilometers) through space.

Touchdown was expected to occur Tuesday at 12:40 p.m. ET, which is Wednesday at 1:40 a.m. Japan Standard Time.

Minutes passed as the mission control team worked to regain contact with the vehicle after an expected communications blackout. About 20 minutes after the planned landing time, Ispace CEO Takeshi Hakamada delivered an update.

“We have not been able to confirm successful landing,” he said. “We have to assume…that we could not complete the landing on the lunar surface. Our engineers continue to investigate the situation.”

He added that his team was able to gather data from the vehicle right up until the attempted landing, a “great achievement” that should help inform future Ispace missions.

The lunar lander, called Hakuto-R, was carrying the Rashid rover — the first Arab-built lunar spacecraft, which was built by Mohammed bin Rashid Space Centre in Dubai.



ADVERTISEMENT

In history, only three countries have ever executed a controlled landing on the moon — the United States, the former Soviet Union and China. The US remains the only country to have put humans on the moon.

Japan’s Ispace had a different approach from prior lunar missions, attempting to land its spacecraft on the moon as a for-profit business rather than under the banner of a single country.

The company had shared mission updates on its Twitter account, including a recent photograph of Earth peeking out from behind the moon that was captured by the spacecraft as it traveled through lunar orbit.


The lunar exploration company had been bracing for mishaps. “Recognizing the possibility of an anomaly during the mission, the results will be weighed and evaluated against the criteria and incorporated into future missions already in development between now and 2025,” the company noted in a December 11 post.

If successful, the 22-pound (10-kilogram) Rashid rover had been expected to emerge from the lunar lander and spend “most of the 14-day lunar daytime exploring the Atlas Crater on the northeast of the Moon,” according to the European Space Agency, which helped design the rover’s wheels.

The “Rashid rover is equipped with one high resolution camera on its front mast and another mounted on its rear, as well as a microscopic camera and thermal imaging camera,” ESA said. “It also carries a ‘Langmuir probe’ to sample the plasma environment prevailing just above the lunar surface.”

Other lunar lander attempts
Japan’s Ispace is one of several companies that competed in the Google Lunar XPrize, which offered a $20 million reward to the company that could put a robotic rover on the moon, travel a couple thousand feet, and transmit data back to Earth.

PHOTO DATE: March 29, 2023. LOCATION: Bldg. 8, Room 183 - Photo Studio. SUBJECT: Official crew portrait for Artemis II, from left: NASA Astronauts Christina Koch, Victor Glover, Reid Wiseman, Canadian Space Agency Astronaut Jeremy Hansen. PHOTOGRAPHER: Josh Valcarcel
The four astronauts NASA picked for the first crewed moon mission in 50 years
The Google-sponsored competition was scrapped in 2018 — but Ispace was among the companies that chose to continue pursuing the mission.

Israel-based company SpaceIL was the first XPrize contestant to attempt to put its lander on the moon after the program ended. Its Beresheet spacecraft crashed in 2019 after ground teams lost contact with the lander as it approached the surface.

That same year, the Indian Space and Research Organisation lost contact with a lunar lander shortly before it was slated to touch down on the moon. Communications with the spacecraft were never regained, and images from NASA’s Lunar Reconnaissance Orbiter later revealed the crash site and final resting place of the mission.

A mission to retrieve lunar soil samples on behalf of NASA’s Artemis program, which intends to use commercial lunar landers to explore the moon’s surface, is part of Ispace’s future plans.

RELATED
China's Mars rover dormant likely due to dust

'''

test_vector2 = tfidf_vect.transform([space_cnn])
lr_clf.predict(test_vector2)

array([14])