In [1]:
import torch

In [2]:
torch.cuda.get_device_name()

'GeForce RTX 3090'

In [6]:
from sentence_transformers import SentenceTransformer, util
import os
import csv
import time

In [3]:



# Model for computing sentence embeddings. We use one trained for similar questions detection
model = SentenceTransformer('all-MiniLM-L6-v2')

# We donwload the Quora Duplicate Questions Dataset (https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs)
# and find similar question in it
url = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"
dataset_path = "quora_duplicate_questions.tsv"
max_corpus_size = 50000 # We limit our corpus to only the first 50k questions


# Check if the dataset exists. If not, download and extract
# Download dataset if needed
if not os.path.exists(dataset_path):
    print("Download dataset")
    util.http_get(url, dataset_path)

# Get all unique sentences from the file
corpus_sentences = set()
with open(dataset_path, encoding='utf8') as fIn:
    reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_MINIMAL)
    for row in reader:
        corpus_sentences.add(row['question1'])
        corpus_sentences.add(row['question2'])
        if len(corpus_sentences) >= max_corpus_size:
            break

corpus_sentences = list(corpus_sentences)
print("Encode the corpus. This might take a while")
corpus_embeddings = model.encode(corpus_sentences, batch_size=64, show_progress_bar=True, convert_to_tensor=True)


print("Start clustering")
start_time = time.time()

#Two parameters to tune:
#min_cluster_size: Only consider cluster that have at least 25 elements
#threshold: Consider sentence pairs with a cosine-similarity larger than threshold as similar
clusters = util.community_detection(corpus_embeddings, min_community_size=25, threshold=0.75)

print("Clustering done after {:.2f} sec".format(time.time() - start_time))

#Print for all clusters the top 3 and bottom 3 elements
for i, cluster in enumerate(clusters):
    print("\nCluster {}, #{} Elements ".format(i+1, len(cluster)))
    for sentence_id in cluster[0:3]:
        print("\t", corpus_sentences[sentence_id])
    print("\t", "...")
    for sentence_id in cluster[-3:]:
        print("\t", corpus_sentences[sentence_id])

Encode the corpus. This might take a while


Batches:   0%|          | 0/782 [00:00<?, ?it/s]

Start clustering
Clustering done after 4.06 sec

Cluster 1, #103 Elements 
	 How can I improve my spoken English?
	 How will I improve my spoken English?
	 What should I do to improve my spoken English?
	 ...
	 How can I increase my knowledge in English language?
	 How do I improve my English writing and speaking skills?
	 What should I do to speak English fluently and not face any problem with vocabulary?

Cluster 2, #86 Elements 
	 How can one make money online?
	 How could I make money online?
	 How do I to make money online?
	 ...
	 How can an apprentice programmer make money online?
	 How do I earned big money even online without investment?
	 What are the ways to make money working from home?

Cluster 3, #82 Elements 
	 What are the economic implications of banning 500 and 1000 rupee notes?
	 What will be the implications of banning 500 and 1000 rupees currency notes on Indian economy?
	 How will the ban of 1000 and 500 rupee notes affect the Indian economy?
	 ...
	 What are your

In [6]:
import spacy
nlp = spacy.load('en_core_web_sm')

text = 'My first birthday was great. My 2. was even better.'
sentences = [i for i in nlp(text).sents]

In [5]:
sentences

[My first birthday was great., My 2. was even better.]

In [18]:
text = '''
Any hot issues that we need to cover first before we get into the round table? That sounds like a no. You have a couple of new folks with us today. MJ. Put Juneja is gonna be covering for me and we invited him here to to. Let him get a flavor for how this meeting goes every week. Try try not to scare him off. And I I see Brian Eargle welcome Brian good. Good to have you with us. Thanks, I'm happy to disgrace the group. My presence. Well, welcome Brian and MJ. The club. I just I hate running. Just jump in quickly with a little bit of back story here that I don't know that everybody has. So Ron has announced his retirement effective the end of February 2024, so. We wanted no. 2025 actually sorry Ron. Still math challenged. I know I am. I'm big challenge to it. All seriousness runs well deserved retirement is effectively end of next month. February Mohit. We we call him MJ. He goes by MJ is is stepping in for an internally. And so he's here today and you'll see and hear more from him as time goes on. Thanks running back to you. Yeah, and MJ joined us as as a data engineer a couple years ago contractor. We converted them to full time and he's doing all the machine learning work over at IMD, so he's he's definitely got the data engineering skill set and then a whole lot more so. Looking forward to dumping all this on him. Thank you Ron. That's a great introduction. OK, well if no further ado, I guess we can jump into the round table. Aruna, you're up first. Track your own good afternoon and welcome Brian and MJ. And no matter how many people join in, I never seem to be get off the first one to start this meeting, so. That's too bad. We could fix that if somebody else wants to hold this schedule. It actually just like you got games in front of a router, right? Change your name. I think we should go by the last names you know. Anyway, so for QA this week the focus will be on the web self-service we've got the UAT entry criteria that is scheduled this afternoon. I published the report and the triage score is at 260. We're just a bit about 35 points about the threshold, but we are going to go ahead and recommend that it's a go for the UAT phase to start. And the other testing on web service. We completed performance testing and the load testing all the defects have been fixed. There is just one pending defect and. Scratch that team is looking into that. Uhm? We will rate. Read on those because we want to repurpose the purf one environment for a particular. Defect, which is the web self-service dashboard. Something which came in and we needed a dedicated environment so for the next two weeks or so per fan will be focused on getting that testing done and later on will continue with the performance testing. The other non functional tests that we did for web sales service, which was the flavors that was all completed. All the effects have been resolved. We also performed the multi browser and device testing all but one beef. After spending so web self service quality. Criteria perspective, but all good to go there. Wrapped at the usual sprints, we did have a couple of meetings and we're trying to see how QA team can kind of perform it. Additional testing on top of the regular Sprint tests and we're calling these as incremental testing. These will be going back to taking through individual artifacts instead of looking at it from the end to end perspective, so that's the latest on Raptor. Meet the usual MC 35 sprints and defect fixes in progress. We did have a couple of meetings with Lance Randall trying to see how we can get off the I 1011 because of the limited days of support over there. So we tried to see if it works with the current Edge browser. It did all we had to do was just to change some browser settings and works fine. The current version of Heirloom Works Fine, QA tested it all except two machines. Had some issues and we're going to dig into that. The next group would be with Rachel's team. I asked Rachel to call me and for the meeting tomorrow on Friday and I'll be walking through with them following the steps so we can see if it's working on all their machines there. I think after that we got a pretty good consensus on FP should be moving forward with Edge Browser any other. The releases we have one planned this weekend. Everything is ready except one defect, which we should pay completing in another hour or so. Regressions are running so it looks like it'll be a goal for this weekend. Actually, Ashley has requested a minor fix to make heat is fixed OK. Like 10 minutes ago to me. OK, so that's going to be in building. Yeah alright OK. So there we go. And then we've got the 23rd, the next, the week after that, which is going to be a full highlight deployment for the January character statements. And of course the Feb 13th point release. We've got 17 so far on the yellow band, 60 facts for High Lab, but hoping after this it takes care of most of the business escalated effects for the year end and the beginning of the year. And we plan to get into the regular regular every two months schedule. That's all I have. Quick question, I am so are we feeling confident enough or at least I'm going to propose that we're feeling confident enough that maybe we can work on the automated portion of the settings change and and use that with the next group of folks to test that bit of it. Yeah, manual thing again. No, I mean the team. The testing I have with Rachel's team. All the PS I would like to just have that completed and maybe yes I wanna do that manually still. OK yes yes. OK. Sure, thank you. Any other questions for arena? Sounds like none. OK, Brian, your turn. Yeah, I think my bullet point says it all kinda good. He got the first meeting pass come out yeah this is where we all come in. You know admit to all all the wrong we've done so. It's worse when you get to star by your name. Yeah, the star is special, so watch for that. Well, that's why Brian said he was good. He's done no wrong. We don't have time for my wrongs. OK, Carrie, you're kind. Sure, the how about coding work for 1095 is complete. It's getting deployed out on Sunday and then we're gonna have file generations and and generating all the forms and everything running Sunday evening and Monday. During the holiday. And then all the coding work for the the next release after that is already pretty much done, so we'll begin testing on that either tomorrow or Tuesday when we get back. Check you have your hand writes. Well I'm glad you finish OK. Have questions let you finish. OK, so I understand we had had a problem sending out 140 something. A mistake, mistake and emails or letters or something at the end of last year, or is that all taken care of? You know yes, so there was a hiccup during one of our batch programs with the connection to the mainframe. So when that happened, we moved all those people. The two direct bill when they should have been annuity deducted. We sent them letters we we found that population and we got them. We did a attacks recap on them which put them back in annuity deduction. So they should all be sorted. We also found everybody that term was not in that population, so we actually taxfree captain other 2000 people on payroll day. And if I understand the way we found out about is somebody called and said hey, what do you? What do you do to me or something like that is that? Is that right? They were on the output report for the errors from that batch job. But yeah, we also found out from the members contact. OK, so we did. We actually do have it. We do have a report that identifies that we had an error. That's good, that's that's where I was going with this so thank you. The next thing is, somebody's gotta actually look at it, right? So I don't know if, yeah, it's supposed to be a Business Report so. OK, yes they didn't get that far or members started calling us. Thank you. Yeah, the last thing about this is I added the the log for J upgrade to 2.7 dot one into our April please. That's it thanks. Thanks Gary. Any more questions for Carrie? Casey is not working. Go ahead. We are going we are having. Chris Christie. I'm sorry, so I just wanted to let you check you know that we are having some meetings to figure out how to prevent this from happening and happening in the future. Hello can you hear me sorry? OK, there we go. So just want to let you know we are having some meetings with the infrastructure staff to figure out how to make some adjustments so that server doesn't go down in the future. Or we know if it goes down. And then we're also working on the development side to try to put some defense mechanisms in so that or some code in so that if something breaks we can catch it. Stand up online Christy, that's where that's where I was. Yeah, thank you good. But we got those meetings underway, so thank you for working on it. You're welcome. Anymore on the high log. Topic no OK. Casey's out so Ernie that's you alright. Uh, lots of meetings. Figuring out resources. Uh, you know, there's a lot of. Opportunity now to expand some resources so we can meet our timelines. So we're looking at that. Also, Mike has got postings out well, he's he's reached out to the contractor contracting groups. He's got to see how many SB position that he's trying to fill. In case you hear anything that that my TRS tax tables were updated for 2022, but there are different than what AMPAS has the the actual annuity, payroll, and so there have been some phone calls where they're you know expressing concern because it's a bigger amount being withheld, and when they pull up my TRS and look at the withholding amount, it's a larger amount than what the calculator shows, and that's just gonna be an issue this month. Ray said he's gonna rerun the tax calculation. After at the end of the month to get it back to where it will match again. But there have been some phone calls. Jennifer and Adam are are aware just want to pass that on to this group in case you hear hear about it. So was there was the math they got correct, but the amount reported incorrect. Is that what it was Ernie? Everything they have to over withhold from the January check so that they don't get bit with their taxes at the end of the year. And so because they're gonna get another check. And so Ray has to over withhold from the January annuity to make sure the taxes all are correct at your end, OK? With the 13th check, it's just. It's the 13th check. Throws it off. At and I'm guessing we got. I think there was, you know, got the first year might have been an issue where people got surprised. So Ray is very proactive about it. To ensure that we we don't get, you know, get them in trouble with the IRS. Yeah, OK, but but we you know we weren't that it wasn't something I didn't realize what was going on and I asked Ray I'm like why? What's up your not? Your numbers are off but it's what he does to make sure that we don't we do the right withholding. And it'll get fixed in February. There's several grooming sessions going on. That's what that's why Mike is out right now. If he doesn't make this meeting, the user experience customer experience release, you know we're trying to at least give some some estimates so that Rachel can, you know, try to figure out where to slot those into the releases, so that's several. All groups are having meetings about those this week and next. I think that's it. Well, I did have a question for I want tell Runa. I was glad to see 260 because I've been telling people were over 300 on the on the the numbers so. I'll take, I'll take that number. Exactly, and I mean I think the last two triage is really helped. We knocked out a lot of defects, yeah, and we we've definitely. We've tried to move some resources back into web self service to make sure those numbers stay. You know, keep moving in the right direction. I don't know Ernie. I think you were saying 300, so we'd be happy. I was scared, I'm not. I'm not gonna lie. I'm happy with your 60. Yeah, it worked. Anything else for honey? Alright James, your turn. To January netpay change letters were delivered to Xerox this week so they can be out by the 24th. All of them should be mailed by then. Something I was talking to Aruna about. For those of you, I'm sure all the smart managers and directors know. But be careful when you throw about the term contract to hire. According to DRR, this concept does not exist and you can get your hands slapped. So it's best just to say something in the request for for our. For contractors that this position has the potential to transfer to an FT position in the future and would fall in a salary range of blah blah blah. That's fine to say what you can't say is we intend to hire this person after six months blah blah blah. I was pretty explicit in the one that I sent out trying to avoid any confusion or hurt feelings down the road and evidently ****** somebody off and they reported to the IR and they got in touch with. PNC and PNC had to call me in and it was going to be a nightmare and basically I had to cancel that request and submit another one which took all of, you know, an afternoon. But anyway, don't do it. Be careful what you say. We're not allowed to do that. We can certainly make a decision to hire him later, but we can't tell him we're looking for somebody to hire. Can you give us that exact text and language 'cause I just did a solicitation yesterday and had to come back and re track and do stuff. So I just wanna make sure that I'm following PNC guidelines. I think I think what I've gotten but and my my agreement with PNC was they'd see it before I submitted it. It's just a good practice. Let PNC have a chance, run it by them, say the city problems with this request, but this particular one where I say this position has the potential to transfer to an FT position. I've used that many times and never gotten into into any trouble. The reason I did get into a lot of trouble was I was very explicit, but saying if you don't want to, if you don't want to let us have this person like in the future, then don't submit him 'cause we're. That's what we're looking to do and and that got me in trouble. Those little 2 blocked two forward gave him too much information about our plans and somebody that didn't get the didn't get the. That didn't get the contractor complained that I was doing this, and I'm pretty sure I know who it is and it doesn't matter. It's not the point I shouldn't have done it. And Billy, you have your hand up, Sir. Yeah, I just want to add on. Thank you very much James for going down this path. It's it's definitely a very valid point and and want everybody to understand this right now because we're in a spot where we're going to be wanting to to be positioned to be able to hire some of these people. The other thing that I, I guess I just want to add on is several of us. You know we have direct contacts with these vendors and we're talking to them. And then most of them already are aligned with us. That after six months there there free game for us. But some of them are not aligned with that, and that's where we just want to to agree. We can make sure that we're having conversations with the vendor and letting them know we don't want to see anything so much in again, in the initial solicitation 'cause it'll just add confusion to it. But most of our main ones that we have, they know after six months we can go in, and we also want to make sure though, that as we're doing this that we are letting that vendor know that we want them to be a viable. Candidate after six months and otherwise, that means we in the case of visas or in the case of salary ranges, we do wanna have some of those open discussions early on so that I know James, you did that quite a bit, but to make sure that we had people that we could really higher. Great topic, I think. Sample also might have something to add to it. Well, no. I I had not heard that before and I am guilty of saying contract to hire when I know that that's my intention so that there isn't confusion up front. So that's good to know that. We're not supposed to say that before. Yeah, the the big thing is is it's important that the defender knows or it's in our contract that we have the ability to do so. If we want to, so that's what we've been stressing. I think the line obviously is on the position that we're saying anything that implies that we're guaranteeing employment. They really the the main point is that we just have the ability to hire if we choose to after six months. So I think that's excellent. Points. Yeah no, I think I said contract to hire if the candidate you know meets meets you know expectations and criteria, but doesn't sound like that's even OK, right? We do I. I would run it by I would run it by PNC. I'm sure they would be happy to see it. Because I basically told them. You have to have that. I said they have to be able to be hired by TRS, so you have to have a green card or US citizenship. And evidently that ruffled feathers so you you just have to be really careful with what you say. We're not I. I basically told him I'm not going to look at anybody that doesn't have these. These two things because I can't hire them. So it I I could send everybody the the beautifully. I mean, it was a wonderful email. It was in a pistol on what we were looking for and evidently. It it it rubs somebody the wrong way so I I think I send a room of the example of what not to send and she can share that. I don't care but anyway I just I. I really love that email you titled it what not to say and that told me everything what should be said yeah careful I'm basically I just went and removed everything from there except for one little reference the one that I left in here and it's just it's I've just heard that that title. Contract to hire a few times and I know I've been burned by it, so I didn't want anybody else burned by it. Where did my screen go? Is that just me? It's just you. Yeah, the word rap stopped all of a sudden. OK, thank you. Yeah, that's what I'm looking at. I'm like where did all my stuff go? Glider, but it's gonna make it smaller anyway anyway, Jake doesn't. Thanks. Thanks for sharing that, because there's a lot of us kind of going through that right now and so we appreciate. Yes, you give us. And I I can I. I took the bullet for the team and got my hand slapped and I'd be happy to to to say I'm I'm happy to tell you. Send it to PNC 1st and tell him James told us told you too so you don't get in trouble. So we had a problem with the James real quick. This is Kyle real. Two quick things related to that. First of all, let me just clarify. It was one of our our contract vendors that complained to our PNC department about this. That is that true? Yes, that's that is that would be my understanding. They complained. The DIR&DIR contacted PNC. They they probably showed him the the the request that I sent out and complained and dirl got mad and said we we don't see how you could award this contract based on the fact that blah blah blah I could. It was an ugly deal. They said we couldn't award the contract even though we had already selected somebody, so I had to. I'm assuming it's that same guy that's complained before about things. Yes, I'm so sorry that that would be my assumption too. I used to be yeah. Anyway, actually I think it's in the contract. There's something that contract that we're not supposed to do that, but we do, but we just can't publish it. So they said we couldn't award it, so we had to cancel that one and reissue another one. And I did and. Fortunately we got the same person and magically and we were able to hire them and everybody had to move on their go on go about their business. We get to choose what vendors we send these to. That's right. There's there's a number of vendors that are ready to work with us, and there's one that I'm aware of that does not, and I think we should quit sending it to that person. I. I think if you if you have any intention of contract to hire, you should not send it to that person. You're being very kind, James yes, Sir, with blunting that so. Anyway and then and then, the second quickie was the language that you used on the edited version is what you put into one note here. That's was Jackie. Jackie has that question specifically right? That's the one I've used plenty of times and never had a complaint. I'd love to see your what not to send the email. I will post it. I don't like you, just need to you know publish that just for how awesome it was. It will be shared. It will be shared in here as soon as I get through talking I will go grab it. Share it out here. We had about we is that it move on to the next topic. So we had we had an issue with the caps to seesawed inbound file. We weren't comparing enough fields on the incoming caps file to recognize that a change had been made and so those extra fields work compared in the file history straightened itself out. Uhm? We have another issue with death claims that popped up this week where if an employee does not have a district and Member records. Yes, the old legacy Member records then the the report of death claim will fail the automated part. Because it can't find a district on Legacy member records and basically this would affect anybody that came into employment after trust go live because we stopped updating member that for Member records. It does get updated. Unfortunately it gets up gets updated on the account closure which is after the report of death. So there is a solution that I've offered up to Rachel and we'll see where it goes from there. Would require us to write a little ZAP program. Basically that could be reused over and over. We could use parameters. To say, go find this, go find this taxpayer number on member records. Insert this district number and those could be done via parameters, so it would be a really simple thing. But of course the death claims people would have to check first to make sure that there wasn't one there before they submitted the report of death. And I believe they do because this was a new person submitting this, and evidently the rest of the team knew there's a manual work around when somebody has joined, say, in 2018. For 2019 and then died and their their member records doesn't have it. I think it's something we should do because there's no reason this shouldn't be an automated process when we could probably spend a few hours of work and and make it an automated process. But that's it. I think I can't see the left. I think that's all I wrote. Any questions? Good, I'll go look for my email. Thank you. Thanks, James. Mike is out. He's got a conflict. Ram your next pretty much we are working on that you a TWSS fixing defects and trying to bring this course down. Also getting ready for the next week thing and wrap that we are working on the regular sprints and the increment test effects. That's one thing and pretty aggressively working on a lot of mapping things with legacy decoration actually. So we have been engaging with the ESP. Team legacy team on their low P team. So a lot of discussions are going around and payroll code merge is still in process. Still we wanna see the application up and running pretty much near still not complete yet. That's all for me. Any questions for Graham? No. OK, Rachel is out of Randall your turn. Sure, so I tried to put a little more out this week and last one of the things I just want to get everybody a heads up on. We're going to be kicking off some kind of Active Directory. Clean up over sometime in the. The not too distant future. This is mostly going to be organizational. Rearranging things we've got several years of of of. Try not to use my word craft, but craft that has built up over time. So we just kind of a cleanup effort, will open a change ticket, will will give some kind of heads up before we do it. The vast majority of things we should have zero impact on, but the the pads for where these groups live might be changing. And if you have an app that's sensitive to that under your purview. It might be something you wanna give us a heads up about, but I wanna come on that later. I just wanna give an initial heads up if you wanna share it with your teams that that'll be coming or reach out to me. If you think you have something that can help you help you look. Wanted to mention in in Azure land we we have the first of our the first Azure Key vault and the first Azure Data Lake have been created. They are in the process of being handed over to to the the data teams to start. Playing with them, these are just proof of concept. First cuts, so nothing too exciting. But this is the first step for progress on the journey, and some of those, so that's good. Uh. We're also starting interviews and have multiple open positions that our candidate polls actually look better than what we've seen the last few rounds. So pretty exciting about that Richard and and has been doing an amazing job reaching out and and contacting people on on LinkedIn and stuff for us trying to drum up some some interest so she's been a huge huge help partner, but but the pool is looking good, so we'll see. We'll see if we can find some good candidates. I wanted to mention I threw it out and chat when I reached out to the GSG contracting company. This is the one that we have to to run through. It looks like our previous guy Sergio is no longer with them or something. I haven't updated the word doc yet. I'm gonna reach out to the lady that responded to me. Looked like his old email got forwarded over to her when I reach out to her and just confirm that she seems very new. I didn't know if anybody has worked with her. Her name is Jennifer. I can't remember. No. I I Randall talked. I had the same experience where I sent it to Sergio and got an email from her and she actually has a but I have worked with her. She has a form that she wants me yeah so and that's what she that's what she gave me to wear. Sergio would just say hey the caliber of kind of candidate you're looking for is not really what we have in that. Yeah cheese cheese first she wanted the P. Oh and now she's saying Oh yeah won't do a P. Oh, here's a form it's OK so we might wanna partner up Jennifer I'm I haven't talked to her. Yeah, have you talked to her? I did. She called me and I didn't. I picked up and talked her OK. Could you kind of allude to kind of how things used to work and it was a little smaller. Well I I told her that we don't do. We don't give pose at this point until we find someone she's good with that she's apparently heard that from several state agencies and she does want us to fill out the form. Alright, I I'm like OK her little bit to see if I can get a little more info so but OK I'll I'll I'll touch base with you so I just want to give everybody a heads up. Did she confirm the Sergio is gone gone? Yes, yeah he's speaking. Take him off. OK, so I'll I'll go ahead and pick the the contact info with her info and I'll just see if I can get a little bit more from her. I also just mostly share the form so we can template, eyes it for the group here. Yes, I will get like it's it's kind of nasty, it's just it's. It's like a it's like fill out one of the job duties. Water the there's not gonna be a lot of temptation but also the format their template template eyes what we can't OK sounds good thanks I didn't super look at it. I opened it up and there was way more. And I was hoping to see so. We got a hand up and I will have my little list going. That is me, Randall. In my spring. A quick question. We have a a request out to your team to build out the development. Ssas server for the HIPAA data. Any kind of ETA you'll net. There is a little bit of conversation going on exactly how we want to build that out exactly. Zach and Brian and I have had a little bit of conversation on it, but we could go a couple of two or three different routes in SAC. I need the pin you on that I think for kind of one of the final pieces so. OK awesome, thank you. Yep Yep. Yep it's in. It's in. The turn. Did you make a decision on the audition? Not yet. You can use developer version, that's what I assumed. With that in mind, I think I we're we're converting all the other ones to developer, yet or not. But anyway, so yeah, so I think we're like only issue with that. Be that if the prod one is not enterprise there could be features introduced in the developer edition like Development Standards, Development and QA that would not exist in crime. That was the only concern. Yeah, that that that makes sense. You know, if if we've got production servers running non enterprise, better be a very special use case. So I was on the conversations that are going on right now. Yeah yeah. So yeah, yes Jackie it is. It is still definitely in the queue. Perfect thank you. I did want to add to. James was saying earlier a little bit. I would be very careful with our language based on lessons learned by change. Again, thank you. Sacrificial James. The I wanted to throw out there though that when I you know the contracting headhunters always want to have a quick meeting with you to talk about things. Oh, they've been. That you've been asking, are you very, very sure that this person has to be? You know, within driving distance and they're there? No, the couple laptop to have our emphasizing how hard it's going to be defined. Candidates based on our not narrow criteria. So just throwing that out there for just if it's worthwhile to anybody moving on. I know I got a lot of stuff. I'll skip Mac if you don't. Nobody cares. We've got a memory leak on the domain controllers. I just I. I feel like that one might be worth bringing up just a little bit so so are you guys. So in this case when you work with vendors, what you're gonna be that means your contract with somebody. You're telling them that they need to be where they can come into Austin or come what what's? So yeah, yeah, so so we're using that very careful language that that you know James alluded to write. This position may turn into one yadda yadda yadda, and as such we need someone that can fulfill those needs should that happen. So our criteria is. Right? OK so honey. Alright, I guess I need to know that too if that's how every across the board we're proceeding that they need to be close enough to be able to be coming into the office. That's why maybe Crystal pipe in here. So that's why. OK, because I'm getting the development side. I am not saying that yet, I'm I'm again trying to stay in the state of Texas. So that's yeah, go ahead Chris. Yeah, no, that's that's exactly right. I mean, since most of these positions we hope to be able to find someone through the process that we would actually bring on board. Right now, TRSS stance is pretty much it needs to be in Texas, as you alluded to, so if they're already outside of Texas, you already creating an issue. If you wanted to bring them on, and so to me, that's more what it's about. You know if you had a short time contractor that you were not going to bring in as FTE and they were working as a contractor outside the state, and if that still makes your needs. I can get that approved, but those would be a special use cases for those that we want to hire. It would be in Texas. The other thing I just wanna let you know we are hearing throughout the entire agency this issue about coming to work and building to work from home and percentage of work from home and we are hitting that pretty hard trying to maybe loosen our stance a little bit on that as it as it makes sense for the organization we always got to be able to justify it in. This is how we can get the best people and do the best work and so that's always got to be. The first thing on my mind and however you craft weather a position you want to bring in and where they need to work and how they need to work. But just so you know, we're hearing that and and trying to work through the possible changes in the future on that. Billy, did you have some more? No, no, that's you covered it. I just just want to be ready to respond just in Case No, glad you did. That's good. Good stuff. Alright, that's very good information to do so. Absolutely yeah. Everything doesn't matter. Red River data center. We, the team did a lot of work cleaning it out. It is cleaned out. We got all the valuable stuff that we need out so we're about to turn it over to facilities to fully do with it. What they what they're planning to do. So if anybody wants one, will ask nostalgic walk through the data center. I don't know what their timeline looks like exactly yet, but. It is ready to be handed over to them. The only other thing I wanted to touch on is. The woes of purchasing, pricing, and lead times for anybody that's not actively involved in some of that purchasing at its own story, I'm gonna touch that 22. Too much, but that that's coming with its own burden. Right now, we're seeing major uptick sand pricing. The last servers we bought had about a 50 or 60% price increase from what they were September Ish of last year. So huge huge uptick in. We just got wind that they're about to do another price Rev on hardware at the end of January to to to combat some of the, I guess supply chain. We're also hearing, you know 6 to 8 month lead time on on hardware. So, so the trickle down of hopefully we're getting to the end of the major impact of COVID. We're still suffering, and I think we'll be seeing some of the trickle down impact of it for a little while to come, so it's it's really starting to. To seep into the the day-to-day, at least on the infrastructure side of the House of what we're seeing with our with our vendors and stuff. So just throwing it out there. It's just kind of interesting this. Or it's interesting to me so so wanted to share that it's non non non trivial amounts of change on pricing and stuff so Cisco least just went up to with the delays in PNC. 12 it leaps through networking component is the core and Jay went out of purchasing the 2030% because of the delays with PNC. It was it was closer to 450 ish to 60%. They didn't get it out the door in time and so when we re quoted it, it went up about 50 to 60% on just standard switches. Nothing fancy. So at 8 to that months to get it and now it's gonna be 8 to 9 months to get it. That's good impact. A little bit of El Paso Paso been ordered yet and we may have to buy. Maybe not exactly what we want and then replace it. We just I would appreciate y'all tracking this. This is great information 'cause this is also something we're trying to continue to keep the EC updated on is all the supply of train up the supply chain issues as long as as well as the cost because we're seeing it everywhere. No surprise to anybody, but I'd like us to. I'd like to be able to be a specific as possible so when you see those individual ones this is going on is is a great example. If we could note those and just kind of let. Let us know when. May try to pull something together so we can compile that 'cause this isn't the last step and I think we're going to see some impacts down the road. So thank you absolutely, absolutely and Christy. You have your hand raised. Did you have something else or is there? No, it's just gonna ask. Does anybody have any advice on how long it's gonna take to get these contractors on board that we're looking for? Should we go ahead and enter the Rex now as we're starting the solicitation process? Yeah, I was. I'm just worried it's going to drag out too and we're going to lose people. They require they require that per 63 though, right? That's yeah that Lt even said to us today we did a training and requisition to you have to have your PR 63 for contractors. Well that's got their names and stuff on it. Yeah, so that's really long. Yeah, that's late in the game, yeah? Training tomorrow, so I was curious what you guys have learned on timing. I mean, we want yeah we need to talk this through a little bit more. We do. We want to get those in as soon as possible. Maybe somehow we need to talk to Lt about how we can put in some kind of a placeholder or earmark something that. Plus I'm required that name and the exact rate or something. Yeah no, I agree if I can help with that let me know if we need to huddle with with Lt or Martin or even answer that starts becoming a problem over over this group let us know. Sounds cool, and that's all I had. Sorry for. Good discussion. So sorry for the margins. Someone widened the margin so I'm pulling them back in. Hopefully everyone can see the the the all the text now. And as Randall mentioned, some of the Azure Data hub building blocks are coming together. Thank you, Randall. I'm sure was out, but he's he's back mostly now and will be available if you need someone to rundown research or or implementation details or stuff like that. So as as he comes back up to strength, hopefully he can move things forward with y'all. Data warehousing projects. Continuing, we're getting a lot of additional code translation entries for the the financial data warehouse. Manisha is working those into the Q 80 process. Health claims both the CVS and Blue Cross dated. The subjects are are scaling up. We're we're gonna try to load a years worth of data and see if the design for the warehouse performs well with the junk dimension and and the way we split off the the person dimension. Health claims has a couple of other requests to us. We have a new data provider, MEDISPAN, and we've set up a a pipeline to pull that data in and they'll be working on the ETL for it. And then, as we discussed earlier the the SSAS tabular model is is getting the infrastructure put in place and then they'll start building power BI reports on it. And we're continuing to integrate with all our new team members. At the same time, keeping all the balls in the air, Jackie is really helping with with the release management front on health claims and and soon on FTW so. Looking forward to that. And I I introduced him Jay before he's he's a really smart guy and I appreciate him jumping into to cover for me at the end of February. So don't don't scare him off, we need him. That's it for me, any questions? OK, looks like Tim out Tom your turn. Got it first. I got good news. We did find a contractor. He'll be joining the middleware team. Matt Riley. His target start date is 27. Can get through this paperwork. I just I'm plowing through page right now. Getting a PR and everything all lined up. Dev OPS. Highlight reporting we we had a great meeting today with Christy and Jane and Kerry. I'm kind of going through the they're planning for the migration to GitHub and their training and stuff like that excellent meeting Michael did a good job project commanding that following that project, I'm just going well. The TIFF to PDF is running a little bit slow, you know, with the Christmas vacation and Steve Bradley office right now for personal reasons. We had a target conversion. Of the January 18th that might be pushed back up for five days where the 218 testing right now. But we have about a million documents that we've converted over and I think look pretty liminary looks good right now. Just moving on to the OE digitization. Had a meeting with him. We have a weekly. We have actually have weekly meetings with them, just really building up all their documentation classifications and all. That's all going to work. They're moving, moving up room for the fiscal scanner right now, and will. We've done some lab tests, done scanning, going on in the next couple weeks. Open text we are. Reviewing the statement of work with Open Decks Group, we went through the licenses and professional services ethnic. There's some gaps there, so we're doing some gap analysis and trying to get a kind of a really clear way of how we're going to do the upgrade and what our licensing costs for the future, our maintenance, and what that make sure we have the right count. So it's we're kind of we have a lot of questions that we kicked back to the open text in Will does another meeting next week to go over those extra. Country again just focused on adoption and training and Liam I think is going to be working with Christy and her team. Getting her team drained up. So that's moving. And then we've been talking about this finalizing standards. I I do actually have a meeting with some plants this week to finalize a go over that with Brittany Group and that's it for me. Any questions? Awesome news on the contractor. Yeah, it came from holster so it's as they go. Kinda seems like they're hemorrhaging over there with them. Yeah, yeah yeah, that's a good mention. If you guys know anybody at Whole Foods that's on unhappy, they might be more unhappy with the Amazon merger. And there's a little little uncertainty about where they might land, so we might be able to take advantage of that uncertainty. Come to TRS, where your job is safe. Yeah, good point. Thanks Tom Zach your turn. Hey, so vestment? We've been in a crunch with the B and Sunday shall abbys back Tuesday. Sandesh just confirmed today. It'll be back next Thursday, so that's really great. Betsy is also out starting this afternoon until 119 so I can see a light at the end of the tunnel for a full team support. That'll be really nice production stuff. Still have a few missing data for the 12:31. Basically it was originally scheduled as a Holiday Inn. So Steve St and ourselves miss some feeds. Still turnover. Cover some things that got missed as a result of that stuff. Here. Very weird discrepancy with the training was actually on for that day and we need to also get in Juneteenth holiday that might impact some other systems. But as a reminder, that's a new federal holiday that will need to plan for that. Not expecting to get certain data feeds on. Consensus is a kind of our critical item at the moment that we are trying to get our one build application D tap off of it. It does look like we'll be able to get that done by tomorrow. I'll keep posted if that is looking another way, but really help. I really wanna thank David for really quick help getting the Bloomberg Terminal setup. We transferred it temporarily over to Johnson in order to do this work and then send Dash will finish out the item. On phone apps on next week. I I saw this as this was on course radar two. We just briefly mentioned it, but the axioma we keep having problems nearly every day with the loads going on there. I think it's on the radar, but we're seeing potentially 1 downstream thing that might be an opportunity for performance and permit, but just gets kind of been a little bit of a different thing every single day. Hopefully start seeing some results of those investigations soon. Development you know. In addition to things I think already touched on the call, get some big changes coming to our conceptual model after private marker review, a lot of consolidation and simplification. So excited by that and then distributed again for some more people. And you know, can't master business as usual. We have put our broker development work on hold and we were planning on putting all the breakout accounts into. Timemaster, we're no longer planning to do that, so that'll save us some time, but I'm still trying to figure out what the best way is to identify when those are missing and some issues related there. And continued to to design liquidity dealer house. Yeah, so. Bought lot trying to get done. Be nice to have full team back to keep moving forward. Any questions? OK, thanks Zach. Jennifer, your turn. Hey everyone, so Billy and I are working on the February board presentation and that will include a. Analysis Results and so that's my second item. We're continuing to work on the team schedule and resource analysis, and that's across the reigning remaining road map increments and immani under me. We're looking at what can bring up benefit SL, a benefit Slas that have taken a big hit and also what we can deliver to our external customers. All of that will be included in the board presentation as well as a web self-service demo. And Adam will be doing the the demo of web self-service. At least that's the plan for now, and we've got a meeting with Miss Disney tomorrow to talk about team topics as well. And then just on the SharePoint team side of things, there's a new alms buds page that allows members to submit a complaint. We just had a meeting, Rachel's doing a technical analysis on what phase two of that would be, which would include a workflow. Probably I think we're thinking in CRM, so all of that's being put in the backlogs will be estimated and we'll see where and when that might fall on the road map itself for ServiceNow wrapped up ServiceNow project requirements today, so that's more traditional waterfall project management, and we had a agile demo. Agile requirements start tomorrow. We are doing that in two tracks. One track is really. The Scrum masters in the VA's because it's all about set up work and backlog set up, and then the second track will include the larger group with with many of you in it to look at planning and execution and monitoring of the sprints. And then of course also reporting. Change management requirements are in process and service now as well. We've actually got a meeting this afternoon. Portfolio management will begin in the next two weeks and we'll be doing a demo of portfolio dashboards to CMT and Epoch in February and then also staffing stuff. Looking for a PM to cover data Recon and data migration teams. I'm actively working with contractor contracting companies on that and good information. We have found a contractor. Service that I am. D Slash EDM product owner slash VA so his name is. His last name is Rudy. I remember that. I'll have to look and remember his first name, but I remember the last name being Moody that that person will live on Rachel's team and we're trying to get them on boarded and started in the mid in mid February range run. So we're going to have new team management now. What we're going to have Moody management for for EDM? Yes. Moody. Many I like it. Yes, beauty management. That's good news. Thank you. Yep, that's all I've got. Any questions for Jennifer? Lance, your turn 3 minutes left yeah, 3 minutes left. I y'all can read my there's any questions let me know. I wanted to give Chris or Donald chat still here. If they had anything. I grew yelken read mine too. Any questions, let me know. Save the music. Same for me. OK, the only thing I had to go. That I had to drop for the get ready for the next one, but I I do think this is probably the best meeting that we have each week. And then, and you guys did a great job. Really appreciate it. In fact, I, I guess I'll just go skiing for the next week and I will start. Thank you guys gals. Yeah but you gotta come back healthy, don't break anything. No extreme skiing now. I'll just take the next minute. Just real quick one. I did want to let the team know I'm I'm planning on canceling the All hands meeting that we're having next week. I really hate to do that in this new year, but I also would like to have it where we have a little bit of an audience 'cause I really want to use it as a time to bring in some breakfast tacos and and and meet some new people. So I'm gonna send out a cancellation with the hope of getting that back on the calendar pretty quickly as we start seeing about. The COVID numbers and so forth, which I hope it's only a short delay, but I want to make this trip aware and if you have any concerns about me doing that, I also want to know because I'm not sure if there's there's some. Hot topics out there that would be good for me to have a meeting with the entire division on. That's my current plan at this point. Also certainly wanted to say thank you for all those that made the 13th check happen, or the supplemental payment ray and all those that worked with him. It's just been fantastic thinking about impacting. You know 430,000 members. And I believe the cost was something like $700 million that we were able to pay out to our retirees. I think that is fantastic and there was a lot of work. Here in and appreciate it, a lot of work going on. I appreciate all your help and what you're doing with the FT in planning because we are building board presentations to really help push the point of the resources that we're going to need to improve service delivery. And Billy, I may have interrupted you. Did you have something? No OK, and of course we are working on merits over the next couple of weeks as well, so we're excited to get those moving this year and I think that's probably on my list in the end of the one minute I had. Thanks everybody for all that you do. This is a great meeting. I could not agree more with chat and we'll talk to each of those soon. Thank you, thank you. Thanks all right on time thanks bye thanks. Thanks Ron for leading. 
'''

In [19]:
sentences = [i for i in text.split(". ") if len(i.split(" ")) > 3]

In [8]:
sentences[0]

'\nBeing leadership meeting yesterday, I think that would be the the corporate updates, so I already mentioned the FTE'

In [7]:

# Model for computing sentence embeddings. We use one trained for similar questions detection
model = SentenceTransformer('all-MiniLM-L6-v2')

# Get all unique sentences from the file

corpus_sentences = sentences[:]
print("Encode the corpus. This might take a while")
corpus_embeddings = model.encode(corpus_sentences, batch_size=64, show_progress_bar=True, convert_to_tensor=True)


print("Start clustering")
start_time = time.time()

#Two parameters to tune:
#min_cluster_size: Only consider cluster that have at least 25 elements
#threshold: Consider sentence pairs with a cosine-similarity larger than threshold as similar
clusters = util.community_detection(corpus_embeddings, min_community_size=6,  threshold=0.75)

print("Clustering done after {:.2f} sec".format(time.time() - start_time))

#Print for all clusters the top 3 and bottom 3 elements
for i, cluster in enumerate(clusters):
    print("\nCluster {}, #{} Elements ".format(i+1, len(cluster)))
    for sentence_id in cluster[0:3]:
        print("\t", corpus_sentences[sentence_id])
    print("\t", "...")
    for sentence_id in cluster[-3:]:
        print("\t", corpus_sentences[sentence_id])

Encode the corpus. This might take a while


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Start clustering
Clustering done after 0.01 sec


In [10]:
corpus_embeddings

tensor([[-0.0837, -0.0670,  0.0436,  ..., -0.0422,  0.0232,  0.0802],
        [-0.0796,  0.0007, -0.0557,  ...,  0.0113,  0.0807,  0.0460],
        [ 0.0203,  0.0548,  0.0305,  ...,  0.0633,  0.0602,  0.0833],
        ...,
        [-0.0045, -0.0902, -0.0298,  ..., -0.0204, -0.0385, -0.0455],
        [-0.0700, -0.0020,  0.0966,  ..., -0.1205,  0.0096,  0.0213],
        [-0.0098, -0.0005, -0.0096,  ...,  0.0853, -0.0511,  0.0523]],
       device='cuda:0')

In [20]:
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans

embedder = SentenceTransformer('all-MiniLM-L6-v2')


corpus_embeddings = embedder.encode(sentences)

# Perform kmean clustering
num_clusters = 25
clustering_model = KMeans(n_clusters=num_clusters)
clustering_model.fit(corpus_embeddings)
cluster_assignment = clustering_model.labels_

clustered_sentences = [[] for i in range(num_clusters)]
for sentence_id, cluster_id in enumerate(cluster_assignment):
    clustered_sentences[cluster_id].append(sentences[sentence_id])

for i, cluster in enumerate(clustered_sentences):
    print("Cluster ", i+1)
    print(cluster)
    print("")

Cluster  1
['You have a couple of new folks with us today', 'Good to have you with us', 'I just I hate running', "Just jump in quickly with a little bit of back story here that I don't know that everybody has", 'I know I am', 'Thanks running back to you', 'Yeah, I think my bullet point says it all kinda good', "You know admit to all all the wrong we've done so", "We don't have time for my wrongs", "Well I'm glad you finish OK", 'Hello can you hear me sorry? OK, there we go', 'You know, keep moving in the right direction', "I was scared, I'm not", "I'm not gonna lie", "I'm happy with your 60", "But anyway, don't do it", 'Be careful what you say', "It's just a good practice", 'Thank you very much James for going down this path', 'I mean, it was a wonderful email', "I don't care but anyway I just I", "I really love that email you titled it what not to say and that told me everything what should be said yeah careful I'm basically I just went and removed everything from there except for one

In [44]:
clustered_sentences

[['A man is eating food.', 'The girl is carrying a baby.'],
 ['A man is eating a piece of bread.'],
 ['A man is eating pasta.', 'A monkey is playing drums.'],
 ['The baby is carried by the woman',
  'A man is riding a white horse on an enclosed ground.'],
 ['A man is riding a horse.',
  'Someone in a gorilla costume is playing a set of drums.',
  'A cheetah is running behind its prey.',
  'A cheetah chases prey on across a field.']]